cheerio-to-text 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAa9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CA8BR"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAc9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CAyCR"}
package/lib/esm/index.js CHANGED
@@ -6,6 +6,7 @@ const inlineElements = new Set(`a,abbr,acronym,audio,b,bdi,bdo,big,br,button,can
6
6
  tt,u,var,video,wbr`
7
7
  .split(",")
8
8
  .map((s) => s.trim()));
9
+ const isBlockTag = (tagName) => !inlineElements.has(tagName);
9
10
  export function render(node) {
10
11
  let root = null;
11
12
  if (typeof node === "string") {
@@ -23,24 +24,31 @@ export function render(node) {
23
24
  let text = "";
24
25
  function enter(element) {
25
26
  if (element.type === "text") {
26
- if (element.data.trim())
27
- text += element.data;
27
+ text += element.data;
28
28
  }
29
- else if (element.type === "tag") {
30
- if (!inlineElements.has(element.tagName)) {
31
- if (text.at(-1) !== "\n")
32
- text += "\n";
29
+ }
30
+ function leave(element) {
31
+ if (element.type === "tag") {
32
+ // console.log({ LEAVING: element.type, tagName: element.tagName })
33
+ if (isBlockTag(element.tagName)) {
34
+ text += "\n";
33
35
  }
34
36
  }
35
37
  }
36
- walk(root, enter);
37
- return text.trim();
38
+ walk(root, enter, leave);
39
+ return text
40
+ .trim()
41
+ .split(/\n+/g)
42
+ .map((line) => line.trim())
43
+ .filter(Boolean)
44
+ .join("\n");
38
45
  }
39
- function walk(root, enter) {
46
+ function walk(root, enter, leave) {
40
47
  enter(root);
41
48
  if (root.type === "tag") {
42
49
  for (const child of root.children) {
43
- walk(child, enter);
50
+ walk(child, enter, leave);
44
51
  }
45
52
  }
53
+ leave(root);
46
54
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAa9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CA8BR"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAW,OAAO,EAAE,MAAM,SAAS,CAAA;AAc9E,wBAAgB,MAAM,CACpB,IAAI,EAAE,UAAU,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,GAChE,MAAM,CAyCR"}
package/lib/index.js CHANGED
@@ -19,6 +19,7 @@ var cheerio_1 = __importDefault(require("cheerio"));
19
19
  var inlineElements = new Set("a,abbr,acronym,audio,b,bdi,bdo,big,br,button,canvas,cite,code,data,\n datalist,del,dfn,em,embed,i,iframe,img,input,ins,kbd,label,map,mark,\n meter,noscript,object,output,picture,progress,q,ruby,s,samp,script,\n select,slot,small,span,strong,sub,sup,svg,template,textarea,time,\n tt,u,var,video,wbr"
20
20
  .split(",")
21
21
  .map(function (s) { return s.trim(); }));
22
+ var isBlockTag = function (tagName) { return !inlineElements.has(tagName); };
22
23
  function render(node) {
23
24
  var root = null;
24
25
  if (typeof node === "string") {
@@ -36,28 +37,34 @@ function render(node) {
36
37
  var text = "";
37
38
  function enter(element) {
38
39
  if (element.type === "text") {
39
- if (element.data.trim())
40
- text += element.data;
40
+ text += element.data;
41
41
  }
42
- else if (element.type === "tag") {
43
- if (!inlineElements.has(element.tagName)) {
44
- if (text.at(-1) !== "\n")
45
- text += "\n";
42
+ }
43
+ function leave(element) {
44
+ if (element.type === "tag") {
45
+ // console.log({ LEAVING: element.type, tagName: element.tagName })
46
+ if (isBlockTag(element.tagName)) {
47
+ text += "\n";
46
48
  }
47
49
  }
48
50
  }
49
- walk(root, enter);
50
- return text.trim();
51
+ walk(root, enter, leave);
52
+ return text
53
+ .trim()
54
+ .split(/\n+/g)
55
+ .map(function (line) { return line.trim(); })
56
+ .filter(Boolean)
57
+ .join("\n");
51
58
  }
52
59
  exports.render = render;
53
- function walk(root, enter) {
60
+ function walk(root, enter, leave) {
54
61
  var e_1, _a;
55
62
  enter(root);
56
63
  if (root.type === "tag") {
57
64
  try {
58
65
  for (var _b = __values(root.children), _c = _b.next(); !_c.done; _c = _b.next()) {
59
66
  var child = _c.value;
60
- walk(child, enter);
67
+ walk(child, enter, leave);
61
68
  }
62
69
  }
63
70
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
@@ -68,4 +75,5 @@ function walk(root, enter) {
68
75
  finally { if (e_1) throw e_1.error; }
69
76
  }
70
77
  }
78
+ leave(root);
71
79
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cheerio-to-text",
3
- "version": "0.1.2",
3
+ "version": "0.2.0",
4
4
  "description": "Turn a Cheerio object into plain text",
5
5
  "repository": {
6
6
  "type": "git",