cheerio-to-text 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/esm/index.d.ts.map +1 -1
- package/lib/esm/index.js +18 -10
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +18 -10
- package/package.json +1 -1
package/lib/esm/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAc9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CAyCR"}
|
package/lib/esm/index.js
CHANGED
|
@@ -6,6 +6,7 @@ const inlineElements = new Set(`a,abbr,acronym,audio,b,bdi,bdo,big,br,button,can
|
|
|
6
6
|
tt,u,var,video,wbr`
|
|
7
7
|
.split(",")
|
|
8
8
|
.map((s) => s.trim()));
|
|
9
|
+
const isBlockTag = (tagName) => !inlineElements.has(tagName);
|
|
9
10
|
export function render(node) {
|
|
10
11
|
let root = null;
|
|
11
12
|
if (typeof node === "string") {
|
|
@@ -23,24 +24,31 @@ export function render(node) {
|
|
|
23
24
|
let text = "";
|
|
24
25
|
function enter(element) {
|
|
25
26
|
if (element.type === "text") {
|
|
26
|
-
|
|
27
|
-
text += element.data;
|
|
27
|
+
text += element.data;
|
|
28
28
|
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
}
|
|
30
|
+
function leave(element) {
|
|
31
|
+
if (element.type === "tag") {
|
|
32
|
+
// console.log({ LEAVING: element.type, tagName: element.tagName })
|
|
33
|
+
if (isBlockTag(element.tagName)) {
|
|
34
|
+
text += "\n";
|
|
33
35
|
}
|
|
34
36
|
}
|
|
35
37
|
}
|
|
36
|
-
walk(root, enter);
|
|
37
|
-
return text
|
|
38
|
+
walk(root, enter, leave);
|
|
39
|
+
return text
|
|
40
|
+
.trim()
|
|
41
|
+
.split(/\n+/g)
|
|
42
|
+
.map((line) => line.trim())
|
|
43
|
+
.filter(Boolean)
|
|
44
|
+
.join("\n");
|
|
38
45
|
}
|
|
39
|
-
function walk(root, enter) {
|
|
46
|
+
function walk(root, enter, leave) {
|
|
40
47
|
enter(root);
|
|
41
48
|
if (root.type === "tag") {
|
|
42
49
|
for (const child of root.children) {
|
|
43
|
-
walk(child, enter);
|
|
50
|
+
walk(child, enter, leave);
|
|
44
51
|
}
|
|
45
52
|
}
|
|
53
|
+
leave(root);
|
|
46
54
|
}
|
package/lib/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAc9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CAyCR"}
|
package/lib/index.js
CHANGED
|
@@ -19,6 +19,7 @@ var cheerio_1 = __importDefault(require("cheerio"));
|
|
|
19
19
|
var inlineElements = new Set("a,abbr,acronym,audio,b,bdi,bdo,big,br,button,canvas,cite,code,data,\n datalist,del,dfn,em,embed,i,iframe,img,input,ins,kbd,label,map,mark,\n meter,noscript,object,output,picture,progress,q,ruby,s,samp,script,\n select,slot,small,span,strong,sub,sup,svg,template,textarea,time,\n tt,u,var,video,wbr"
|
|
20
20
|
.split(",")
|
|
21
21
|
.map(function (s) { return s.trim(); }));
|
|
22
|
+
var isBlockTag = function (tagName) { return !inlineElements.has(tagName); };
|
|
22
23
|
function render(node) {
|
|
23
24
|
var root = null;
|
|
24
25
|
if (typeof node === "string") {
|
|
@@ -36,28 +37,34 @@ function render(node) {
|
|
|
36
37
|
var text = "";
|
|
37
38
|
function enter(element) {
|
|
38
39
|
if (element.type === "text") {
|
|
39
|
-
|
|
40
|
-
text += element.data;
|
|
40
|
+
text += element.data;
|
|
41
41
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
}
|
|
43
|
+
function leave(element) {
|
|
44
|
+
if (element.type === "tag") {
|
|
45
|
+
// console.log({ LEAVING: element.type, tagName: element.tagName })
|
|
46
|
+
if (isBlockTag(element.tagName)) {
|
|
47
|
+
text += "\n";
|
|
46
48
|
}
|
|
47
49
|
}
|
|
48
50
|
}
|
|
49
|
-
walk(root, enter);
|
|
50
|
-
return text
|
|
51
|
+
walk(root, enter, leave);
|
|
52
|
+
return text
|
|
53
|
+
.trim()
|
|
54
|
+
.split(/\n+/g)
|
|
55
|
+
.map(function (line) { return line.trim(); })
|
|
56
|
+
.filter(Boolean)
|
|
57
|
+
.join("\n");
|
|
51
58
|
}
|
|
52
59
|
exports.render = render;
|
|
53
|
-
function walk(root, enter) {
|
|
60
|
+
function walk(root, enter, leave) {
|
|
54
61
|
var e_1, _a;
|
|
55
62
|
enter(root);
|
|
56
63
|
if (root.type === "tag") {
|
|
57
64
|
try {
|
|
58
65
|
for (var _b = __values(root.children), _c = _b.next(); !_c.done; _c = _b.next()) {
|
|
59
66
|
var child = _c.value;
|
|
60
|
-
walk(child, enter);
|
|
67
|
+
walk(child, enter, leave);
|
|
61
68
|
}
|
|
62
69
|
}
|
|
63
70
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
@@ -68,4 +75,5 @@ function walk(root, enter) {
|
|
|
68
75
|
finally { if (e_1) throw e_1.error; }
|
|
69
76
|
}
|
|
70
77
|
}
|
|
78
|
+
leave(root);
|
|
71
79
|
}
|