node-html-parser 3.3.1 → 3.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/nodes/html.js +3 -3
- package/dist/esm/nodes/text.js +35 -0
- package/dist/main.js +42 -3
- package/dist/nodes/html.d.ts +1 -1
- package/dist/nodes/html.js +3 -3
- package/dist/nodes/text.d.ts +5 -0
- package/dist/nodes/text.js +39 -0
- package/package.json +4 -3
package/dist/esm/nodes/html.js
CHANGED
|
@@ -226,7 +226,7 @@ export default class HTMLElement extends Node {
|
|
|
226
226
|
currentBlock.prependWhitespace = true;
|
|
227
227
|
}
|
|
228
228
|
else {
|
|
229
|
-
let text = node.
|
|
229
|
+
let text = node.trimmedText;
|
|
230
230
|
if (currentBlock.prependWhitespace) {
|
|
231
231
|
text = ` ${text}`;
|
|
232
232
|
currentBlock.prependWhitespace = false;
|
|
@@ -238,7 +238,7 @@ export default class HTMLElement extends Node {
|
|
|
238
238
|
dfs(this);
|
|
239
239
|
return blocks.map((block) => {
|
|
240
240
|
// Normalize each line's whitespace
|
|
241
|
-
return block.join('').
|
|
241
|
+
return block.join('').replace(/\s{2,}/g, ' ');
|
|
242
242
|
})
|
|
243
243
|
.join('\n').replace(/\s+$/, ''); // trimRight;
|
|
244
244
|
}
|
|
@@ -363,7 +363,7 @@ export default class HTMLElement extends Node {
|
|
|
363
363
|
if (node.isWhitespace) {
|
|
364
364
|
return;
|
|
365
365
|
}
|
|
366
|
-
node.rawText = node.
|
|
366
|
+
node.rawText = node.trimmedText;
|
|
367
367
|
}
|
|
368
368
|
else if (node.nodeType === NodeType.ELEMENT_NODE) {
|
|
369
369
|
node.removeWhitespace();
|
package/dist/esm/nodes/text.js
CHANGED
|
@@ -14,6 +14,41 @@ export default class TextNode extends Node {
|
|
|
14
14
|
*/
|
|
15
15
|
this.nodeType = NodeType.TEXT_NODE;
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
19
|
+
*/
|
|
20
|
+
get trimmedText() {
|
|
21
|
+
if (this._trimmedText !== undefined)
|
|
22
|
+
return this._trimmedText;
|
|
23
|
+
const text = this.rawText;
|
|
24
|
+
let i = 0;
|
|
25
|
+
let startPos;
|
|
26
|
+
let endPos;
|
|
27
|
+
while (i >= 0 && i < text.length) {
|
|
28
|
+
if (/\S/.test(text[i])) {
|
|
29
|
+
if (startPos === undefined) {
|
|
30
|
+
startPos = i;
|
|
31
|
+
i = text.length;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
endPos = i;
|
|
35
|
+
i = void 0;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (startPos === undefined)
|
|
39
|
+
i++;
|
|
40
|
+
else
|
|
41
|
+
i--;
|
|
42
|
+
}
|
|
43
|
+
if (startPos === undefined)
|
|
44
|
+
startPos = 0;
|
|
45
|
+
if (endPos === undefined)
|
|
46
|
+
endPos = text.length - 1;
|
|
47
|
+
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
48
|
+
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
49
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
50
|
+
return this._trimmedText;
|
|
51
|
+
}
|
|
17
52
|
/**
|
|
18
53
|
* Get unescaped text value of current node and its children.
|
|
19
54
|
* @return {string} text content
|
package/dist/main.js
CHANGED
|
@@ -72,6 +72,45 @@ define("nodes/text", ["require", "exports", "nodes/type", "nodes/node"], functio
|
|
|
72
72
|
_this.nodeType = type_1.default.TEXT_NODE;
|
|
73
73
|
return _this;
|
|
74
74
|
}
|
|
75
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
76
|
+
/**
|
|
77
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
78
|
+
*/
|
|
79
|
+
get: function () {
|
|
80
|
+
if (this._trimmedText !== undefined)
|
|
81
|
+
return this._trimmedText;
|
|
82
|
+
var text = this.rawText;
|
|
83
|
+
var i = 0;
|
|
84
|
+
var startPos;
|
|
85
|
+
var endPos;
|
|
86
|
+
while (i >= 0 && i < text.length) {
|
|
87
|
+
if (/\S/.test(text[i])) {
|
|
88
|
+
if (startPos === undefined) {
|
|
89
|
+
startPos = i;
|
|
90
|
+
i = text.length;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
endPos = i;
|
|
94
|
+
i = void 0;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (startPos === undefined)
|
|
98
|
+
i++;
|
|
99
|
+
else
|
|
100
|
+
i--;
|
|
101
|
+
}
|
|
102
|
+
if (startPos === undefined)
|
|
103
|
+
startPos = 0;
|
|
104
|
+
if (endPos === undefined)
|
|
105
|
+
endPos = text.length - 1;
|
|
106
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
107
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
108
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
109
|
+
return this._trimmedText;
|
|
110
|
+
},
|
|
111
|
+
enumerable: false,
|
|
112
|
+
configurable: true
|
|
113
|
+
});
|
|
75
114
|
Object.defineProperty(TextNode.prototype, "text", {
|
|
76
115
|
/**
|
|
77
116
|
* Get unescaped text value of current node and its children.
|
|
@@ -473,7 +512,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
473
512
|
currentBlock.prependWhitespace = true;
|
|
474
513
|
}
|
|
475
514
|
else {
|
|
476
|
-
var text = node.
|
|
515
|
+
var text = node.trimmedText;
|
|
477
516
|
if (currentBlock.prependWhitespace) {
|
|
478
517
|
text = " " + text;
|
|
479
518
|
currentBlock.prependWhitespace = false;
|
|
@@ -485,7 +524,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
485
524
|
dfs(this);
|
|
486
525
|
return blocks.map(function (block) {
|
|
487
526
|
// Normalize each line's whitespace
|
|
488
|
-
return block.join('').
|
|
527
|
+
return block.join('').replace(/\s{2,}/g, ' ');
|
|
489
528
|
})
|
|
490
529
|
.join('\n').replace(/\s+$/, ''); // trimRight;
|
|
491
530
|
},
|
|
@@ -628,7 +667,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
628
667
|
if (node.isWhitespace) {
|
|
629
668
|
return;
|
|
630
669
|
}
|
|
631
|
-
node.rawText = node.
|
|
670
|
+
node.rawText = node.trimmedText;
|
|
632
671
|
}
|
|
633
672
|
else if (node.nodeType === type_3.default.ELEMENT_NODE) {
|
|
634
673
|
node.removeWhitespace();
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -181,7 +181,7 @@ export default class HTMLElement extends Node {
|
|
|
181
181
|
setAttributes(attributes: Attributes): void;
|
|
182
182
|
insertAdjacentHTML(where: InsertPosition, html: string): void;
|
|
183
183
|
get nextSibling(): Node;
|
|
184
|
-
get nextElementSibling():
|
|
184
|
+
get nextElementSibling(): HTMLElement;
|
|
185
185
|
get classNames(): string;
|
|
186
186
|
}
|
|
187
187
|
export interface Options {
|
package/dist/nodes/html.js
CHANGED
|
@@ -299,7 +299,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
299
299
|
currentBlock.prependWhitespace = true;
|
|
300
300
|
}
|
|
301
301
|
else {
|
|
302
|
-
var text = node.
|
|
302
|
+
var text = node.trimmedText;
|
|
303
303
|
if (currentBlock.prependWhitespace) {
|
|
304
304
|
text = " " + text;
|
|
305
305
|
currentBlock.prependWhitespace = false;
|
|
@@ -311,7 +311,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
311
311
|
dfs(this);
|
|
312
312
|
return blocks.map(function (block) {
|
|
313
313
|
// Normalize each line's whitespace
|
|
314
|
-
return block.join('').
|
|
314
|
+
return block.join('').replace(/\s{2,}/g, ' ');
|
|
315
315
|
})
|
|
316
316
|
.join('\n').replace(/\s+$/, ''); // trimRight;
|
|
317
317
|
},
|
|
@@ -454,7 +454,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
454
454
|
if (node.isWhitespace) {
|
|
455
455
|
return;
|
|
456
456
|
}
|
|
457
|
-
node.rawText = node.
|
|
457
|
+
node.rawText = node.trimmedText;
|
|
458
458
|
}
|
|
459
459
|
else if (node.nodeType === type_1.default.ELEMENT_NODE) {
|
|
460
460
|
node.removeWhitespace();
|
package/dist/nodes/text.d.ts
CHANGED
|
@@ -13,6 +13,11 @@ export default class TextNode extends Node {
|
|
|
13
13
|
* @type {Number}
|
|
14
14
|
*/
|
|
15
15
|
nodeType: NodeType;
|
|
16
|
+
private _trimmedText?;
|
|
17
|
+
/**
|
|
18
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
19
|
+
*/
|
|
20
|
+
get trimmedText(): string;
|
|
16
21
|
/**
|
|
17
22
|
* Get unescaped text value of current node and its children.
|
|
18
23
|
* @return {string} text content
|
package/dist/nodes/text.js
CHANGED
|
@@ -36,6 +36,45 @@ var TextNode = /** @class */ (function (_super) {
|
|
|
36
36
|
_this.nodeType = type_1.default.TEXT_NODE;
|
|
37
37
|
return _this;
|
|
38
38
|
}
|
|
39
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
40
|
+
/**
|
|
41
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
42
|
+
*/
|
|
43
|
+
get: function () {
|
|
44
|
+
if (this._trimmedText !== undefined)
|
|
45
|
+
return this._trimmedText;
|
|
46
|
+
var text = this.rawText;
|
|
47
|
+
var i = 0;
|
|
48
|
+
var startPos;
|
|
49
|
+
var endPos;
|
|
50
|
+
while (i >= 0 && i < text.length) {
|
|
51
|
+
if (/\S/.test(text[i])) {
|
|
52
|
+
if (startPos === undefined) {
|
|
53
|
+
startPos = i;
|
|
54
|
+
i = text.length;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
endPos = i;
|
|
58
|
+
i = void 0;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (startPos === undefined)
|
|
62
|
+
i++;
|
|
63
|
+
else
|
|
64
|
+
i--;
|
|
65
|
+
}
|
|
66
|
+
if (startPos === undefined)
|
|
67
|
+
startPos = 0;
|
|
68
|
+
if (endPos === undefined)
|
|
69
|
+
endPos = text.length - 1;
|
|
70
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
71
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
72
|
+
this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
73
|
+
return this._trimmedText;
|
|
74
|
+
},
|
|
75
|
+
enumerable: false,
|
|
76
|
+
configurable: true
|
|
77
|
+
});
|
|
39
78
|
Object.defineProperty(TextNode.prototype, "text", {
|
|
40
79
|
/**
|
|
41
80
|
* Get unescaped text value of current node and its children.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.5",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
|
|
16
16
|
"dev": "tsc -w & mocha -w ./test/*.js",
|
|
17
17
|
"pretest": "tsc -m commonjs",
|
|
18
|
-
"release": "yarn build && np"
|
|
18
|
+
"release": "yarn build && np",
|
|
19
|
+
"prepare": "npm run build"
|
|
19
20
|
},
|
|
20
21
|
"keywords": [
|
|
21
22
|
"parser",
|
|
@@ -32,7 +33,7 @@
|
|
|
32
33
|
"registry": "https://registry.npmjs.org"
|
|
33
34
|
},
|
|
34
35
|
"dependencies": {
|
|
35
|
-
"css-select": "^
|
|
36
|
+
"css-select": "^4.1.3",
|
|
36
37
|
"he": "1.2.0"
|
|
37
38
|
},
|
|
38
39
|
"devDependencies": {
|