@tryghost/html-to-plaintext 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/index.js +1 -0
- package/lib/html-to-plaintext.js +120 -0
- package/package.json +28 -0
package/README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Html To Plaintext
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## Usage
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Develop
|
|
8
|
+
|
|
9
|
+
This is a monorepo package.
|
|
10
|
+
|
|
11
|
+
Follow the instructions for the top-level repo.
|
|
12
|
+
1. `git clone` this repo & `cd` into it as usual
|
|
13
|
+
2. Run `yarn` to install top-level dependencies.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
## Test
|
|
18
|
+
|
|
19
|
+
- `yarn lint` run just eslint
|
|
20
|
+
- `yarn test` run lint and tests
|
|
21
|
+
|
package/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('./lib/html-to-plaintext');
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
const _ = require('lodash');
|
|
2
|
+
|
|
3
|
+
const mergeSettings = (extraSettings) => {
|
|
4
|
+
return _.mergeWith({}, baseSettings, extraSettings, function customizer(objValue, srcValue) {
|
|
5
|
+
if (_.isArray(objValue)) {
|
|
6
|
+
return objValue.concat(srcValue);
|
|
7
|
+
}
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
const baseSettings = {
|
|
12
|
+
wordwrap: false,
|
|
13
|
+
preserveNewlines: true,
|
|
14
|
+
|
|
15
|
+
// equiv returnDomByDefault: true,
|
|
16
|
+
baseElements: {returnDomByDefault: true},
|
|
17
|
+
selectors: [
|
|
18
|
+
// Ignore images, equiv ignoreImage: true
|
|
19
|
+
{selector: 'img', format: 'skip'},
|
|
20
|
+
|
|
21
|
+
// disable uppercase headings, equiv uppercaseHeadings: false
|
|
22
|
+
{selector: 'h1', options: {uppercase: false}},
|
|
23
|
+
{selector: 'h2', options: {uppercase: false}},
|
|
24
|
+
{selector: 'h3', options: {uppercase: false}},
|
|
25
|
+
{selector: 'h4', options: {uppercase: false}},
|
|
26
|
+
{selector: 'h5', options: {uppercase: false}},
|
|
27
|
+
{selector: 'h6', options: {uppercase: false}},
|
|
28
|
+
{selector: 'table', options: {uppercaseHeaderCells: false}},
|
|
29
|
+
|
|
30
|
+
// Backwards compatibility with html-to-text 5.1.1
|
|
31
|
+
{selector: 'div', format: 'inline'}
|
|
32
|
+
]
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
let excerptConverter;
|
|
36
|
+
let emailConverter;
|
|
37
|
+
let commentConverter;
|
|
38
|
+
let commentSnippetConverter;
|
|
39
|
+
|
|
40
|
+
const loadConverters = () => {
|
|
41
|
+
if (excerptConverter && emailConverter) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const {compile} = require('html-to-text');
|
|
46
|
+
|
|
47
|
+
const excerptSettings = mergeSettings({
|
|
48
|
+
selectors: [
|
|
49
|
+
{selector: 'a', options: {ignoreHref: true}},
|
|
50
|
+
{selector: 'figcaption', format: 'skip'},
|
|
51
|
+
// Strip inline and bottom footnotes
|
|
52
|
+
{selector: 'a[rel=footnote]', format: 'skip'},
|
|
53
|
+
{selector: 'div.footnotes', format: 'skip'},
|
|
54
|
+
// Don't output hrs
|
|
55
|
+
{selector: 'hr', format: 'skip'},
|
|
56
|
+
// Don't output > in blockquotes
|
|
57
|
+
{selector: 'blockquote', format: 'block'},
|
|
58
|
+
// Don't include signup cards in excerpts
|
|
59
|
+
{selector: '.kg-signup-card', format: 'skip'}
|
|
60
|
+
]
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const emailSettings = mergeSettings({
|
|
64
|
+
selectors: [
|
|
65
|
+
// equiv hideLinkHrefIfSameAsText: true
|
|
66
|
+
{selector: 'a', options: {hideLinkHrefIfSameAsText: true}},
|
|
67
|
+
// Don't include html .preheader in email
|
|
68
|
+
{selector: '.preheader', format: 'skip'}
|
|
69
|
+
]
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const commentSettings = mergeSettings({
|
|
73
|
+
preserveNewlines: false,
|
|
74
|
+
selectors: [
|
|
75
|
+
// equiv hideLinkHrefIfSameAsText: true
|
|
76
|
+
{selector: 'a', options: {hideLinkHrefIfSameAsText: true}},
|
|
77
|
+
// No space between <p> tags. An empty <p> is needed
|
|
78
|
+
{selector: 'p', options: {leadingLineBreaks: 1, trailingLineBreaks: 1}}
|
|
79
|
+
]
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const commentSnippetSettings = mergeSettings({
|
|
83
|
+
preserveNewlines: false,
|
|
84
|
+
ignoreHref: true,
|
|
85
|
+
selectors: [
|
|
86
|
+
{selector: 'blockquote', format: 'skip'}
|
|
87
|
+
]
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
excerptConverter = compile(excerptSettings);
|
|
91
|
+
emailConverter = compile(emailSettings);
|
|
92
|
+
commentConverter = compile(commentSettings);
|
|
93
|
+
commentSnippetConverter = compile(commentSnippetSettings);
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
module.exports.excerpt = (html) => {
|
|
97
|
+
loadConverters();
|
|
98
|
+
|
|
99
|
+
return excerptConverter(html);
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
module.exports.email = (html) => {
|
|
103
|
+
loadConverters();
|
|
104
|
+
|
|
105
|
+
return emailConverter(html);
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
module.exports.comment = (html) => {
|
|
109
|
+
loadConverters();
|
|
110
|
+
|
|
111
|
+
return commentConverter(html);
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
module.exports.commentSnippet = (html) => {
|
|
115
|
+
loadConverters();
|
|
116
|
+
|
|
117
|
+
return commentSnippetConverter(html)
|
|
118
|
+
.replace(/\n/g, ' ')
|
|
119
|
+
.replace(/\s+/g, ' ');
|
|
120
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@tryghost/html-to-plaintext",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"repository": "https://github.com/TryGhost/Ghost/tree/main/packages/html-to-plaintext",
|
|
5
|
+
"author": "Ghost Foundation",
|
|
6
|
+
"private": false,
|
|
7
|
+
"main": "index.js",
|
|
8
|
+
"files": [
|
|
9
|
+
"index.js",
|
|
10
|
+
"lib"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"dev": "echo \"Implement me!\"",
|
|
14
|
+
"test:unit": "NODE_ENV=testing c8 --all --check-coverage --reporter text --reporter cobertura -- mocha --reporter dot './test/**/*.test.js'",
|
|
15
|
+
"test": "yarn test:unit",
|
|
16
|
+
"lint:code": "eslint *.js lib/ --ext .js --cache",
|
|
17
|
+
"lint": "yarn lint:code && yarn lint:test",
|
|
18
|
+
"lint:test": "eslint -c test/.eslintrc.js test/ --ext .js --cache"
|
|
19
|
+
},
|
|
20
|
+
"devDependencies": {
|
|
21
|
+
"c8": "8.0.1",
|
|
22
|
+
"mocha": "10.8.2"
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"html-to-text": "8.2.1",
|
|
26
|
+
"lodash": "4.17.21"
|
|
27
|
+
}
|
|
28
|
+
}
|