node-html-parser 3.3.2 → 3.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/nodes/html.js +77 -20
- package/dist/esm/nodes/text.js +35 -0
- package/dist/main.js +116 -20
- package/dist/nodes/html.d.ts +1 -1
- package/dist/nodes/html.js +77 -20
- package/dist/nodes/text.d.ts +5 -0
- package/dist/nodes/text.js +39 -0
- package/package.json +4 -3
package/dist/esm/nodes/html.js
CHANGED
|
@@ -11,23 +11,80 @@ function decode(val) {
|
|
|
11
11
|
// clone string
|
|
12
12
|
return JSON.parse(JSON.stringify(he.decode(val)));
|
|
13
13
|
}
|
|
14
|
-
|
|
15
|
-
kBlockElements
|
|
16
|
-
kBlockElements.
|
|
17
|
-
kBlockElements.
|
|
18
|
-
kBlockElements.
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
kBlockElements.
|
|
22
|
-
kBlockElements.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
kBlockElements.
|
|
26
|
-
kBlockElements.
|
|
27
|
-
kBlockElements.
|
|
28
|
-
kBlockElements.
|
|
29
|
-
kBlockElements.
|
|
30
|
-
kBlockElements.
|
|
14
|
+
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
15
|
+
const kBlockElements = new Set();
|
|
16
|
+
kBlockElements.add('address');
|
|
17
|
+
kBlockElements.add('ADDRESS');
|
|
18
|
+
kBlockElements.add('article');
|
|
19
|
+
kBlockElements.add('ARTICLE');
|
|
20
|
+
kBlockElements.add('aside');
|
|
21
|
+
kBlockElements.add('ASIDE');
|
|
22
|
+
kBlockElements.add('blockquote');
|
|
23
|
+
kBlockElements.add('BLOCKQUOTE');
|
|
24
|
+
kBlockElements.add('br');
|
|
25
|
+
kBlockElements.add('BR');
|
|
26
|
+
kBlockElements.add('details');
|
|
27
|
+
kBlockElements.add('DETAILS');
|
|
28
|
+
kBlockElements.add('dialog');
|
|
29
|
+
kBlockElements.add('DIALOG');
|
|
30
|
+
kBlockElements.add('dd');
|
|
31
|
+
kBlockElements.add('DD');
|
|
32
|
+
kBlockElements.add('div');
|
|
33
|
+
kBlockElements.add('DIV');
|
|
34
|
+
kBlockElements.add('dl');
|
|
35
|
+
kBlockElements.add('DL');
|
|
36
|
+
kBlockElements.add('dt');
|
|
37
|
+
kBlockElements.add('DT');
|
|
38
|
+
kBlockElements.add('fieldset');
|
|
39
|
+
kBlockElements.add('FIELDSET');
|
|
40
|
+
kBlockElements.add('figcaption');
|
|
41
|
+
kBlockElements.add('FIGCAPTION');
|
|
42
|
+
kBlockElements.add('figure');
|
|
43
|
+
kBlockElements.add('FIGURE');
|
|
44
|
+
kBlockElements.add('footer');
|
|
45
|
+
kBlockElements.add('FOOTER');
|
|
46
|
+
kBlockElements.add('form');
|
|
47
|
+
kBlockElements.add('FORM');
|
|
48
|
+
kBlockElements.add('h1');
|
|
49
|
+
kBlockElements.add('H1');
|
|
50
|
+
kBlockElements.add('h2');
|
|
51
|
+
kBlockElements.add('H2');
|
|
52
|
+
kBlockElements.add('h3');
|
|
53
|
+
kBlockElements.add('H3');
|
|
54
|
+
kBlockElements.add('h4');
|
|
55
|
+
kBlockElements.add('H4');
|
|
56
|
+
kBlockElements.add('h5');
|
|
57
|
+
kBlockElements.add('H5');
|
|
58
|
+
kBlockElements.add('h6');
|
|
59
|
+
kBlockElements.add('H6');
|
|
60
|
+
kBlockElements.add('header');
|
|
61
|
+
kBlockElements.add('HEADER');
|
|
62
|
+
kBlockElements.add('hgroup');
|
|
63
|
+
kBlockElements.add('HGROUP');
|
|
64
|
+
kBlockElements.add('hr');
|
|
65
|
+
kBlockElements.add('HR');
|
|
66
|
+
kBlockElements.add('li');
|
|
67
|
+
kBlockElements.add('LI');
|
|
68
|
+
kBlockElements.add('main');
|
|
69
|
+
kBlockElements.add('MAIN');
|
|
70
|
+
kBlockElements.add('nav');
|
|
71
|
+
kBlockElements.add('NAV');
|
|
72
|
+
kBlockElements.add('ol');
|
|
73
|
+
kBlockElements.add('OL');
|
|
74
|
+
kBlockElements.add('p');
|
|
75
|
+
kBlockElements.add('P');
|
|
76
|
+
kBlockElements.add('pre');
|
|
77
|
+
kBlockElements.add('PRE');
|
|
78
|
+
kBlockElements.add('section');
|
|
79
|
+
kBlockElements.add('SECTION');
|
|
80
|
+
kBlockElements.add('table');
|
|
81
|
+
kBlockElements.add('TABLE');
|
|
82
|
+
kBlockElements.add('td');
|
|
83
|
+
kBlockElements.add('TD');
|
|
84
|
+
kBlockElements.add('tr');
|
|
85
|
+
kBlockElements.add('TR');
|
|
86
|
+
kBlockElements.add('ul');
|
|
87
|
+
kBlockElements.add('UL');
|
|
31
88
|
class DOMTokenList {
|
|
32
89
|
constructor(valuesInit = [], afterUpdate = (() => null)) {
|
|
33
90
|
this._set = new Set(valuesInit);
|
|
@@ -207,7 +264,7 @@ export default class HTMLElement extends Node {
|
|
|
207
264
|
const blocks = [currentBlock];
|
|
208
265
|
function dfs(node) {
|
|
209
266
|
if (node.nodeType === NodeType.ELEMENT_NODE) {
|
|
210
|
-
if (kBlockElements.
|
|
267
|
+
if (kBlockElements.has(node.rawTagName)) {
|
|
211
268
|
if (currentBlock.length > 0) {
|
|
212
269
|
blocks.push(currentBlock = []);
|
|
213
270
|
}
|
|
@@ -226,7 +283,7 @@ export default class HTMLElement extends Node {
|
|
|
226
283
|
currentBlock.prependWhitespace = true;
|
|
227
284
|
}
|
|
228
285
|
else {
|
|
229
|
-
let text = node.
|
|
286
|
+
let text = node.trimmedText;
|
|
230
287
|
if (currentBlock.prependWhitespace) {
|
|
231
288
|
text = ` ${text}`;
|
|
232
289
|
currentBlock.prependWhitespace = false;
|
|
@@ -363,7 +420,7 @@ export default class HTMLElement extends Node {
|
|
|
363
420
|
if (node.isWhitespace) {
|
|
364
421
|
return;
|
|
365
422
|
}
|
|
366
|
-
node.rawText = node.
|
|
423
|
+
node.rawText = node.trimmedText;
|
|
367
424
|
}
|
|
368
425
|
else if (node.nodeType === NodeType.ELEMENT_NODE) {
|
|
369
426
|
node.removeWhitespace();
|
package/dist/esm/nodes/text.js
CHANGED
|
@@ -14,6 +14,41 @@ export default class TextNode extends Node {
|
|
|
14
14
|
*/
|
|
15
15
|
this.nodeType = NodeType.TEXT_NODE;
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
19
|
+
*/
|
|
20
|
+
get trimmedText() {
|
|
21
|
+
if (this._trimmedText !== undefined)
|
|
22
|
+
return this._trimmedText;
|
|
23
|
+
const text = this.rawText;
|
|
24
|
+
let i = 0;
|
|
25
|
+
let startPos;
|
|
26
|
+
let endPos;
|
|
27
|
+
while (i >= 0 && i < text.length) {
|
|
28
|
+
if (/\S/.test(text[i])) {
|
|
29
|
+
if (startPos === undefined) {
|
|
30
|
+
startPos = i;
|
|
31
|
+
i = text.length;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
endPos = i;
|
|
35
|
+
i = void 0;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (startPos === undefined)
|
|
39
|
+
i++;
|
|
40
|
+
else
|
|
41
|
+
i--;
|
|
42
|
+
}
|
|
43
|
+
if (startPos === undefined)
|
|
44
|
+
startPos = 0;
|
|
45
|
+
if (endPos === undefined)
|
|
46
|
+
endPos = text.length - 1;
|
|
47
|
+
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
48
|
+
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
49
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
50
|
+
return this._trimmedText;
|
|
51
|
+
}
|
|
17
52
|
/**
|
|
18
53
|
* Get unescaped text value of current node and its children.
|
|
19
54
|
* @return {string} text content
|
package/dist/main.js
CHANGED
|
@@ -72,6 +72,45 @@ define("nodes/text", ["require", "exports", "nodes/type", "nodes/node"], functio
|
|
|
72
72
|
_this.nodeType = type_1.default.TEXT_NODE;
|
|
73
73
|
return _this;
|
|
74
74
|
}
|
|
75
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
76
|
+
/**
|
|
77
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
78
|
+
*/
|
|
79
|
+
get: function () {
|
|
80
|
+
if (this._trimmedText !== undefined)
|
|
81
|
+
return this._trimmedText;
|
|
82
|
+
var text = this.rawText;
|
|
83
|
+
var i = 0;
|
|
84
|
+
var startPos;
|
|
85
|
+
var endPos;
|
|
86
|
+
while (i >= 0 && i < text.length) {
|
|
87
|
+
if (/\S/.test(text[i])) {
|
|
88
|
+
if (startPos === undefined) {
|
|
89
|
+
startPos = i;
|
|
90
|
+
i = text.length;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
endPos = i;
|
|
94
|
+
i = void 0;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (startPos === undefined)
|
|
98
|
+
i++;
|
|
99
|
+
else
|
|
100
|
+
i--;
|
|
101
|
+
}
|
|
102
|
+
if (startPos === undefined)
|
|
103
|
+
startPos = 0;
|
|
104
|
+
if (endPos === undefined)
|
|
105
|
+
endPos = text.length - 1;
|
|
106
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
107
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
108
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
109
|
+
return this._trimmedText;
|
|
110
|
+
},
|
|
111
|
+
enumerable: false,
|
|
112
|
+
configurable: true
|
|
113
|
+
});
|
|
75
114
|
Object.defineProperty(TextNode.prototype, "text", {
|
|
76
115
|
/**
|
|
77
116
|
* Get unescaped text value of current node and its children.
|
|
@@ -222,23 +261,80 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
222
261
|
// clone string
|
|
223
262
|
return JSON.parse(JSON.stringify(he_1.default.decode(val)));
|
|
224
263
|
}
|
|
225
|
-
|
|
226
|
-
kBlockElements
|
|
227
|
-
kBlockElements.
|
|
228
|
-
kBlockElements.
|
|
229
|
-
kBlockElements.
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
kBlockElements.
|
|
233
|
-
kBlockElements.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
kBlockElements.
|
|
237
|
-
kBlockElements.
|
|
238
|
-
kBlockElements.
|
|
239
|
-
kBlockElements.
|
|
240
|
-
kBlockElements.
|
|
241
|
-
kBlockElements.
|
|
264
|
+
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
265
|
+
var kBlockElements = new Set();
|
|
266
|
+
kBlockElements.add('address');
|
|
267
|
+
kBlockElements.add('ADDRESS');
|
|
268
|
+
kBlockElements.add('article');
|
|
269
|
+
kBlockElements.add('ARTICLE');
|
|
270
|
+
kBlockElements.add('aside');
|
|
271
|
+
kBlockElements.add('ASIDE');
|
|
272
|
+
kBlockElements.add('blockquote');
|
|
273
|
+
kBlockElements.add('BLOCKQUOTE');
|
|
274
|
+
kBlockElements.add('br');
|
|
275
|
+
kBlockElements.add('BR');
|
|
276
|
+
kBlockElements.add('details');
|
|
277
|
+
kBlockElements.add('DETAILS');
|
|
278
|
+
kBlockElements.add('dialog');
|
|
279
|
+
kBlockElements.add('DIALOG');
|
|
280
|
+
kBlockElements.add('dd');
|
|
281
|
+
kBlockElements.add('DD');
|
|
282
|
+
kBlockElements.add('div');
|
|
283
|
+
kBlockElements.add('DIV');
|
|
284
|
+
kBlockElements.add('dl');
|
|
285
|
+
kBlockElements.add('DL');
|
|
286
|
+
kBlockElements.add('dt');
|
|
287
|
+
kBlockElements.add('DT');
|
|
288
|
+
kBlockElements.add('fieldset');
|
|
289
|
+
kBlockElements.add('FIELDSET');
|
|
290
|
+
kBlockElements.add('figcaption');
|
|
291
|
+
kBlockElements.add('FIGCAPTION');
|
|
292
|
+
kBlockElements.add('figure');
|
|
293
|
+
kBlockElements.add('FIGURE');
|
|
294
|
+
kBlockElements.add('footer');
|
|
295
|
+
kBlockElements.add('FOOTER');
|
|
296
|
+
kBlockElements.add('form');
|
|
297
|
+
kBlockElements.add('FORM');
|
|
298
|
+
kBlockElements.add('h1');
|
|
299
|
+
kBlockElements.add('H1');
|
|
300
|
+
kBlockElements.add('h2');
|
|
301
|
+
kBlockElements.add('H2');
|
|
302
|
+
kBlockElements.add('h3');
|
|
303
|
+
kBlockElements.add('H3');
|
|
304
|
+
kBlockElements.add('h4');
|
|
305
|
+
kBlockElements.add('H4');
|
|
306
|
+
kBlockElements.add('h5');
|
|
307
|
+
kBlockElements.add('H5');
|
|
308
|
+
kBlockElements.add('h6');
|
|
309
|
+
kBlockElements.add('H6');
|
|
310
|
+
kBlockElements.add('header');
|
|
311
|
+
kBlockElements.add('HEADER');
|
|
312
|
+
kBlockElements.add('hgroup');
|
|
313
|
+
kBlockElements.add('HGROUP');
|
|
314
|
+
kBlockElements.add('hr');
|
|
315
|
+
kBlockElements.add('HR');
|
|
316
|
+
kBlockElements.add('li');
|
|
317
|
+
kBlockElements.add('LI');
|
|
318
|
+
kBlockElements.add('main');
|
|
319
|
+
kBlockElements.add('MAIN');
|
|
320
|
+
kBlockElements.add('nav');
|
|
321
|
+
kBlockElements.add('NAV');
|
|
322
|
+
kBlockElements.add('ol');
|
|
323
|
+
kBlockElements.add('OL');
|
|
324
|
+
kBlockElements.add('p');
|
|
325
|
+
kBlockElements.add('P');
|
|
326
|
+
kBlockElements.add('pre');
|
|
327
|
+
kBlockElements.add('PRE');
|
|
328
|
+
kBlockElements.add('section');
|
|
329
|
+
kBlockElements.add('SECTION');
|
|
330
|
+
kBlockElements.add('table');
|
|
331
|
+
kBlockElements.add('TABLE');
|
|
332
|
+
kBlockElements.add('td');
|
|
333
|
+
kBlockElements.add('TD');
|
|
334
|
+
kBlockElements.add('tr');
|
|
335
|
+
kBlockElements.add('TR');
|
|
336
|
+
kBlockElements.add('ul');
|
|
337
|
+
kBlockElements.add('UL');
|
|
242
338
|
var DOMTokenList = /** @class */ (function () {
|
|
243
339
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
244
340
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
@@ -454,7 +550,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
454
550
|
var blocks = [currentBlock];
|
|
455
551
|
function dfs(node) {
|
|
456
552
|
if (node.nodeType === type_3.default.ELEMENT_NODE) {
|
|
457
|
-
if (kBlockElements.
|
|
553
|
+
if (kBlockElements.has(node.rawTagName)) {
|
|
458
554
|
if (currentBlock.length > 0) {
|
|
459
555
|
blocks.push(currentBlock = []);
|
|
460
556
|
}
|
|
@@ -473,7 +569,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
473
569
|
currentBlock.prependWhitespace = true;
|
|
474
570
|
}
|
|
475
571
|
else {
|
|
476
|
-
var text = node.
|
|
572
|
+
var text = node.trimmedText;
|
|
477
573
|
if (currentBlock.prependWhitespace) {
|
|
478
574
|
text = " " + text;
|
|
479
575
|
currentBlock.prependWhitespace = false;
|
|
@@ -628,7 +724,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
628
724
|
if (node.isWhitespace) {
|
|
629
725
|
return;
|
|
630
726
|
}
|
|
631
|
-
node.rawText = node.
|
|
727
|
+
node.rawText = node.trimmedText;
|
|
632
728
|
}
|
|
633
729
|
else if (node.nodeType === type_3.default.ELEMENT_NODE) {
|
|
634
730
|
node.removeWhitespace();
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -181,7 +181,7 @@ export default class HTMLElement extends Node {
|
|
|
181
181
|
setAttributes(attributes: Attributes): void;
|
|
182
182
|
insertAdjacentHTML(where: InsertPosition, html: string): void;
|
|
183
183
|
get nextSibling(): Node;
|
|
184
|
-
get nextElementSibling():
|
|
184
|
+
get nextElementSibling(): HTMLElement;
|
|
185
185
|
get classNames(): string;
|
|
186
186
|
}
|
|
187
187
|
export interface Options {
|
package/dist/nodes/html.js
CHANGED
|
@@ -48,23 +48,80 @@ function decode(val) {
|
|
|
48
48
|
// clone string
|
|
49
49
|
return JSON.parse(JSON.stringify(he_1.default.decode(val)));
|
|
50
50
|
}
|
|
51
|
-
|
|
52
|
-
kBlockElements
|
|
53
|
-
kBlockElements.
|
|
54
|
-
kBlockElements.
|
|
55
|
-
kBlockElements.
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
kBlockElements.
|
|
59
|
-
kBlockElements.
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
kBlockElements.
|
|
63
|
-
kBlockElements.
|
|
64
|
-
kBlockElements.
|
|
65
|
-
kBlockElements.
|
|
66
|
-
kBlockElements.
|
|
67
|
-
kBlockElements.
|
|
51
|
+
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
52
|
+
var kBlockElements = new Set();
|
|
53
|
+
kBlockElements.add('address');
|
|
54
|
+
kBlockElements.add('ADDRESS');
|
|
55
|
+
kBlockElements.add('article');
|
|
56
|
+
kBlockElements.add('ARTICLE');
|
|
57
|
+
kBlockElements.add('aside');
|
|
58
|
+
kBlockElements.add('ASIDE');
|
|
59
|
+
kBlockElements.add('blockquote');
|
|
60
|
+
kBlockElements.add('BLOCKQUOTE');
|
|
61
|
+
kBlockElements.add('br');
|
|
62
|
+
kBlockElements.add('BR');
|
|
63
|
+
kBlockElements.add('details');
|
|
64
|
+
kBlockElements.add('DETAILS');
|
|
65
|
+
kBlockElements.add('dialog');
|
|
66
|
+
kBlockElements.add('DIALOG');
|
|
67
|
+
kBlockElements.add('dd');
|
|
68
|
+
kBlockElements.add('DD');
|
|
69
|
+
kBlockElements.add('div');
|
|
70
|
+
kBlockElements.add('DIV');
|
|
71
|
+
kBlockElements.add('dl');
|
|
72
|
+
kBlockElements.add('DL');
|
|
73
|
+
kBlockElements.add('dt');
|
|
74
|
+
kBlockElements.add('DT');
|
|
75
|
+
kBlockElements.add('fieldset');
|
|
76
|
+
kBlockElements.add('FIELDSET');
|
|
77
|
+
kBlockElements.add('figcaption');
|
|
78
|
+
kBlockElements.add('FIGCAPTION');
|
|
79
|
+
kBlockElements.add('figure');
|
|
80
|
+
kBlockElements.add('FIGURE');
|
|
81
|
+
kBlockElements.add('footer');
|
|
82
|
+
kBlockElements.add('FOOTER');
|
|
83
|
+
kBlockElements.add('form');
|
|
84
|
+
kBlockElements.add('FORM');
|
|
85
|
+
kBlockElements.add('h1');
|
|
86
|
+
kBlockElements.add('H1');
|
|
87
|
+
kBlockElements.add('h2');
|
|
88
|
+
kBlockElements.add('H2');
|
|
89
|
+
kBlockElements.add('h3');
|
|
90
|
+
kBlockElements.add('H3');
|
|
91
|
+
kBlockElements.add('h4');
|
|
92
|
+
kBlockElements.add('H4');
|
|
93
|
+
kBlockElements.add('h5');
|
|
94
|
+
kBlockElements.add('H5');
|
|
95
|
+
kBlockElements.add('h6');
|
|
96
|
+
kBlockElements.add('H6');
|
|
97
|
+
kBlockElements.add('header');
|
|
98
|
+
kBlockElements.add('HEADER');
|
|
99
|
+
kBlockElements.add('hgroup');
|
|
100
|
+
kBlockElements.add('HGROUP');
|
|
101
|
+
kBlockElements.add('hr');
|
|
102
|
+
kBlockElements.add('HR');
|
|
103
|
+
kBlockElements.add('li');
|
|
104
|
+
kBlockElements.add('LI');
|
|
105
|
+
kBlockElements.add('main');
|
|
106
|
+
kBlockElements.add('MAIN');
|
|
107
|
+
kBlockElements.add('nav');
|
|
108
|
+
kBlockElements.add('NAV');
|
|
109
|
+
kBlockElements.add('ol');
|
|
110
|
+
kBlockElements.add('OL');
|
|
111
|
+
kBlockElements.add('p');
|
|
112
|
+
kBlockElements.add('P');
|
|
113
|
+
kBlockElements.add('pre');
|
|
114
|
+
kBlockElements.add('PRE');
|
|
115
|
+
kBlockElements.add('section');
|
|
116
|
+
kBlockElements.add('SECTION');
|
|
117
|
+
kBlockElements.add('table');
|
|
118
|
+
kBlockElements.add('TABLE');
|
|
119
|
+
kBlockElements.add('td');
|
|
120
|
+
kBlockElements.add('TD');
|
|
121
|
+
kBlockElements.add('tr');
|
|
122
|
+
kBlockElements.add('TR');
|
|
123
|
+
kBlockElements.add('ul');
|
|
124
|
+
kBlockElements.add('UL');
|
|
68
125
|
var DOMTokenList = /** @class */ (function () {
|
|
69
126
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
70
127
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
@@ -280,7 +337,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
280
337
|
var blocks = [currentBlock];
|
|
281
338
|
function dfs(node) {
|
|
282
339
|
if (node.nodeType === type_1.default.ELEMENT_NODE) {
|
|
283
|
-
if (kBlockElements.
|
|
340
|
+
if (kBlockElements.has(node.rawTagName)) {
|
|
284
341
|
if (currentBlock.length > 0) {
|
|
285
342
|
blocks.push(currentBlock = []);
|
|
286
343
|
}
|
|
@@ -299,7 +356,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
299
356
|
currentBlock.prependWhitespace = true;
|
|
300
357
|
}
|
|
301
358
|
else {
|
|
302
|
-
var text = node.
|
|
359
|
+
var text = node.trimmedText;
|
|
303
360
|
if (currentBlock.prependWhitespace) {
|
|
304
361
|
text = " " + text;
|
|
305
362
|
currentBlock.prependWhitespace = false;
|
|
@@ -454,7 +511,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
454
511
|
if (node.isWhitespace) {
|
|
455
512
|
return;
|
|
456
513
|
}
|
|
457
|
-
node.rawText = node.
|
|
514
|
+
node.rawText = node.trimmedText;
|
|
458
515
|
}
|
|
459
516
|
else if (node.nodeType === type_1.default.ELEMENT_NODE) {
|
|
460
517
|
node.removeWhitespace();
|
package/dist/nodes/text.d.ts
CHANGED
|
@@ -13,6 +13,11 @@ export default class TextNode extends Node {
|
|
|
13
13
|
* @type {Number}
|
|
14
14
|
*/
|
|
15
15
|
nodeType: NodeType;
|
|
16
|
+
private _trimmedText?;
|
|
17
|
+
/**
|
|
18
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
19
|
+
*/
|
|
20
|
+
get trimmedText(): string;
|
|
16
21
|
/**
|
|
17
22
|
* Get unescaped text value of current node and its children.
|
|
18
23
|
* @return {string} text content
|
package/dist/nodes/text.js
CHANGED
|
@@ -36,6 +36,45 @@ var TextNode = /** @class */ (function (_super) {
|
|
|
36
36
|
_this.nodeType = type_1.default.TEXT_NODE;
|
|
37
37
|
return _this;
|
|
38
38
|
}
|
|
39
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
40
|
+
/**
|
|
41
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
42
|
+
*/
|
|
43
|
+
get: function () {
|
|
44
|
+
if (this._trimmedText !== undefined)
|
|
45
|
+
return this._trimmedText;
|
|
46
|
+
var text = this.rawText;
|
|
47
|
+
var i = 0;
|
|
48
|
+
var startPos;
|
|
49
|
+
var endPos;
|
|
50
|
+
while (i >= 0 && i < text.length) {
|
|
51
|
+
if (/\S/.test(text[i])) {
|
|
52
|
+
if (startPos === undefined) {
|
|
53
|
+
startPos = i;
|
|
54
|
+
i = text.length;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
endPos = i;
|
|
58
|
+
i = void 0;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (startPos === undefined)
|
|
62
|
+
i++;
|
|
63
|
+
else
|
|
64
|
+
i--;
|
|
65
|
+
}
|
|
66
|
+
if (startPos === undefined)
|
|
67
|
+
startPos = 0;
|
|
68
|
+
if (endPos === undefined)
|
|
69
|
+
endPos = text.length - 1;
|
|
70
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
71
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
72
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
73
|
+
return this._trimmedText;
|
|
74
|
+
},
|
|
75
|
+
enumerable: false,
|
|
76
|
+
configurable: true
|
|
77
|
+
});
|
|
39
78
|
Object.defineProperty(TextNode.prototype, "text", {
|
|
40
79
|
/**
|
|
41
80
|
* Get unescaped text value of current node and its children.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.6",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
|
|
16
16
|
"dev": "tsc -w & mocha -w ./test/*.js",
|
|
17
17
|
"pretest": "tsc -m commonjs",
|
|
18
|
-
"release": "yarn build && np"
|
|
18
|
+
"release": "yarn build && np",
|
|
19
|
+
"prepare": "npm run build"
|
|
19
20
|
},
|
|
20
21
|
"keywords": [
|
|
21
22
|
"parser",
|
|
@@ -32,7 +33,7 @@
|
|
|
32
33
|
"registry": "https://registry.npmjs.org"
|
|
33
34
|
},
|
|
34
35
|
"dependencies": {
|
|
35
|
-
"css-select": "^
|
|
36
|
+
"css-select": "^4.1.3",
|
|
36
37
|
"he": "1.2.0"
|
|
37
38
|
},
|
|
38
39
|
"devDependencies": {
|