ghostwriter 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +76 -15
- data/RELEASE_NOTES.md +14 -0
- data/dirt-textify.gemspec +4 -2
- data/lib/ghostwriter/version.rb +1 -1
- data/lib/ghostwriter/writer.rb +5 -9
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2868e207e695355f8e9f40521b38d1f96b72b45c9ab73207396a87e5b4d535cd
|
4
|
+
data.tar.gz: cf111d734daa4bf94e4d9c2924dbd6c7d12b38b2b26a55db79110730f306fccc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d71989a44d8d2da33496172c600ec38063100c53642850982a9a93ccefbea37e40847e93edfde09d3b0f0dad98f457296f01c070387d86b11cc06e2ee9e04c1
|
7
|
+
data.tar.gz: 0767f0d24a895477aee922bd960380608185b741c0d87975bba65eaa03270539e2af2776bbcef2f689b604b76fde7882dbe9322e522b190858c2699359ce8a3b
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Ghostwriter
|
2
2
|
|
3
|
-
|
3
|
+
A ruby gem that converts HTML to plain text, preserving as much legibility and functionality as possible.
|
4
4
|
|
5
5
|
It's sort of like a reverse-markdown or a very, very simple screen reader.
|
6
6
|
|
@@ -8,8 +8,8 @@ It's sort of like a reverse-markdown or a very, very simple screen reader.
|
|
8
8
|
|
9
9
|
* Some email clients won't or can’t handle HTML at all
|
10
10
|
* Some people explicitly choose plaintext just by preference or accessibility
|
11
|
-
* Spam filters tend to
|
12
|
-
|
11
|
+
* Spam filters tend to prefer emails with a plain text alternative (but if you use this gem to spam people, I will yell
|
12
|
+
at you)
|
13
13
|
|
14
14
|
## Installation
|
15
15
|
|
@@ -29,20 +29,46 @@ Or install it manually with:
|
|
29
29
|
|
30
30
|
## Usage
|
31
31
|
|
32
|
-
Create a `Ghostwriter::Writer` and call `#textify` with the html you want modified:
|
32
|
+
Create a `Ghostwriter::Writer` and call `#textify` with the html string you want modified:
|
33
33
|
|
34
34
|
```ruby
|
35
|
-
html =
|
35
|
+
html = <<~HTML
|
36
|
+
<html>
|
37
|
+
<body>
|
38
|
+
<p>This is some text with <a href="tenjin.ca">a link</a></p>
|
39
|
+
<p>It handles other stuff, too.</p>
|
40
|
+
<hr>
|
41
|
+
<h1>Stuff Like</h1>
|
42
|
+
<ul>
|
43
|
+
<li>Images</li>
|
44
|
+
<li>Lists</li>
|
45
|
+
<li>Tables</li>
|
46
|
+
<li>And more</li>
|
47
|
+
</ul>
|
48
|
+
</body>
|
49
|
+
</html>
|
50
|
+
HTML
|
36
51
|
|
37
|
-
Ghostwriter::Writer.new
|
52
|
+
ghostwriter = Ghostwriter::Writer.new
|
53
|
+
|
54
|
+
puts ghostwriter.textify(html)
|
38
55
|
```
|
39
56
|
|
40
57
|
Produces:
|
41
58
|
|
42
59
|
```
|
43
|
-
This is some
|
60
|
+
This is some text with a link (tenjin.ca)
|
61
|
+
|
62
|
+
It handles other stuff, too.
|
63
|
+
|
44
64
|
|
45
|
-
|
65
|
+
----------
|
66
|
+
|
67
|
+
-- Stuff Like --
|
68
|
+
- Images
|
69
|
+
- Lists
|
70
|
+
- Tables
|
71
|
+
- And more
|
46
72
|
```
|
47
73
|
|
48
74
|
### Links
|
@@ -50,12 +76,7 @@ Other tags translate, too
|
|
50
76
|
Links are converted to the link text followed by the link target in brackets:
|
51
77
|
|
52
78
|
```html
|
53
|
-
|
54
|
-
<html>
|
55
|
-
<body>
|
56
79
|
Visit our <a href="https://example.com">Website</a>
|
57
|
-
<body>
|
58
|
-
</html>
|
59
80
|
```
|
60
81
|
|
61
82
|
Becomes:
|
@@ -85,7 +106,7 @@ Use the base tag to <a href="/contact">expand</a> links.
|
|
85
106
|
Becomes:
|
86
107
|
|
87
108
|
```
|
88
|
-
Use the base tag to expand (https://www.example.com/contact) links
|
109
|
+
Use the base tag to expand (https://www.example.com/contact) links.
|
89
110
|
```
|
90
111
|
|
91
112
|
Or you can use the `link_base` configuration:
|
@@ -119,7 +140,9 @@ But images lacking alt text or with a presentation ARIA role are ignored:
|
|
119
140
|
And images with data URIs won't include the data portion.
|
120
141
|
|
121
142
|
```html
|
122
|
-
|
143
|
+
|
144
|
+
<img src="data:image/gif;base64,R0lGODdhIwAjAMZ/AAkMBxETEBUUDBoaExkaGCIcFx4fGCEfFCcfECkjHiUlHiglGikmFjAqFi8pJCsrJT8sCjMzLDUzJzs0GjkzLTszKTM1Mzg4MD48Mzs+O0tAIElCJ1NCGVdBHUtEMkNFQjlHTFJDOkdGPT1ISUxLRENOT1tMI01PTGdLKk1RU0hTVEtTT0NVVFRTTExYWE9YVGhVP1VZXGFYTWhaMFRcWHFYL1FdXV1dRHdZMVRgYFhgXFdiY11hY1tkX31hJltmZ2pnWnloLGFrbG9oYXlqN3NqTnBqWHxqRItvRIh0Nod0ToF2U5J4LX55Xm97e4B5aZqAQpGAdqOCOZKEYZ2FOJyEVoyKbqiOXpySbLCVcLCXaKWbdKCdfZyhi66dksGdc76fbbije7mkdLOmgq6ogrCpibyvirexisWvhs2vgsGyiLq1lce1lMC5ks28nsfBmcHDq9bAl9PDmMnFo9TGh8zIoM7Jm9vLs9nRo93QqtfSquLQpdXUs+fdterlw////ywAAAAAIwAjAAAH/oArOTo6PYaGOz08P0KMOTZCOzw7PzY/Pz2JPYSDhTSFPTSXPY0tIiIfJz05o5Q/O7A5moc6O4Q0oS8uQisXGCItwTItP5OxOrKjhzSfLzYvgz85ERQXJKcSIkZeJDqOl43StrSEKzo2LhkOGBISDw40JyIVFVEyorBCkZmwtCsrtnLQSJCAwoMFCiwoiECPAr0TjPrtECJwXLMVNARlUCBhQAEFC2SsgWPGDBs3d2RcorSD1SVGr3qskOkihoIH70DO0cOHDx48evD0KQONmQ0aORZJE3VLRYoPBRwoUCCCSx07eoL+xLNnj5UfNFry4BHuR6EcK0qkKJFhAYUE/g+cdHlz1efPrnvM2MjhQlYOWTxktXThIoUKhQoKDHBi5Y0dO0CD5smzJ46NvWJfjYW1w4WKEiWkKkgw9UYdPXTo8Mn6042bvX9pTHoFa5GKzykekP5owEidN1u6PKnzMw+QJ3ttUPr7qKUs0C5KHOyoAMMaNWrmjKlSRYscMFm+nBBUybkLSYsIl3DxwAgcKwWMzGnz5kqTK1e09AEDI0uGE8rJEgNfsuxVggoujGABF1xMoYAVc9RRhxxq5JGVHn3EEYcIGfT1igvGKLfDZyWMkMINa5QhQRNz9CQhT1n5URmHJ8Sygw2BSWLDbaCpgEFPNzxBV4QwApVhHBhg/vABZ0pJIhuCoI0wQhFlkLEGGWfQ9wZ2W6KRBhoUJKncKyK2tMOBPI6wwAxltInlG1uKcQUUV3xpwQUXACSJjbCAxgJoJShggBVtnmGGlm/M4UYcX14QQQQ1PpJjUjmsd5sKCg5gBRdkYMlGG2KwoUYWWYARxgXVnODXqmP9CWgJIESwxhJTbEHGGGbMsSWpaRRBQQQXpPKIiJOgg+BnI4AwwhxcHFHrGGN0KYYYaEhAzQX/7flIDMqx4CoIJY7QxhpY0GorXXXwkUcRj1Lg7gfMDavcCSx4BqsIHpyxRhtT1FCDEmNgF4YY1j6KZ4eXXTast9GVcAIHG2TZRhlT/qCAAg5IZIzCA+1QQ0EGKbgAG7c0pPOAAgQcwEQSZ2R5RhlYVIFEFVccAQEAAASgWEIrXEZYDDHQYAEBAQSAcxBUbCExGWVsMfMVCHSA89QCbHBDX4QRRsPURuMcQBBQYLHGHGuwoYUYVdQQxAIOBCCACVLUgDMBS7rwwgtENHDAAEYLMIAAHhABRRVYKFEDDjjU0AA9HiQhxQQOCDC1BXe/UAQVVATRwAIDDGCAAAd0EAQTTEgBBQ4IIFSBFHFPdYEIFJBAQOUE1K5AAyZgnsQME/jNwAG/e7QBFT4sYEABBiQv6ANDDLDCCwPULr0ADYyeOQcMLMAAAxNAIQUHJwckYEDn5CfvgAEKvECA3+R7nrwB2k+ggQkmaLB3++Sz3zkMIawQCAA7"
|
145
|
+
alt="Data picture" />
|
123
146
|
```
|
124
147
|
|
125
148
|
Becomes:
|
@@ -128,6 +151,44 @@ Becomes:
|
|
128
151
|
Data picture (embedded)
|
129
152
|
```
|
130
153
|
|
154
|
+
### Paragraphs and Linebreaks
|
155
|
+
|
156
|
+
Paragraphs are padded with a newline at the end. Line break tags add an empty line.
|
157
|
+
|
158
|
+
```html
|
159
|
+
<p>I would like to propose a toast.</p>
|
160
|
+
<p>This meal we enjoy together would be improved by one.</p>
|
161
|
+
<br />
|
162
|
+
<p>... Plug in the toaster and I'll get the bread.</p>
|
163
|
+
```
|
164
|
+
|
165
|
+
```
|
166
|
+
I would like to propose a toast.
|
167
|
+
|
168
|
+
This meal we enjoy together would be improved by one.
|
169
|
+
|
170
|
+
|
171
|
+
... Plug in the toaster and I'll get the bread.
|
172
|
+
|
173
|
+
```
|
174
|
+
|
175
|
+
### Headers
|
176
|
+
|
177
|
+
For now, headers are all treated the same and given a simple marker:
|
178
|
+
|
179
|
+
```html
|
180
|
+
<h1>Dog Maintenance and Repair</h1>
|
181
|
+
<h2>Food Input Port</h2>
|
182
|
+
<h3>Exhaust Port Considerations</h3>
|
183
|
+
```
|
184
|
+
|
185
|
+
Becomes:
|
186
|
+
|
187
|
+
```
|
188
|
+
-- Dog Maintenance and Repair --
|
189
|
+
-- Food Input Port --
|
190
|
+
-- Exhaust Port Considerations --
|
191
|
+
```
|
131
192
|
|
132
193
|
### Lists
|
133
194
|
|
data/RELEASE_NOTES.md
CHANGED
data/dirt-textify.gemspec
CHANGED
@@ -10,9 +10,11 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.authors = ['Robin Miller']
|
11
11
|
spec.email = ['robin@tenjin.ca']
|
12
12
|
|
13
|
-
spec.summary = '
|
13
|
+
spec.summary = 'Converts HTML to plain text'
|
14
14
|
spec.description = <<~DESC
|
15
|
-
|
15
|
+
Converts HTML to plain text, preserving as much legibility and functionality as possible.
|
16
|
+
|
17
|
+
Ideal for providing a plaintext multipart segment of email messages.
|
16
18
|
DESC
|
17
19
|
spec.homepage = 'https://github.com/TenjinInc/ghostwriter'
|
18
20
|
spec.license = 'MIT'
|
data/lib/ghostwriter/version.rb
CHANGED
data/lib/ghostwriter/writer.rb
CHANGED
@@ -17,7 +17,7 @@ module Ghostwriter
|
|
17
17
|
#
|
18
18
|
# @return converted text
|
19
19
|
def textify(html)
|
20
|
-
doc = Nokogiri::HTML(
|
20
|
+
doc = Nokogiri::HTML(html.gsub(/\s+/, ' '))
|
21
21
|
|
22
22
|
doc.search('style, script').remove
|
23
23
|
|
@@ -30,15 +30,11 @@ module Ghostwriter
|
|
30
30
|
replace_lists(doc)
|
31
31
|
replace_tables(doc)
|
32
32
|
|
33
|
-
simple_replace(doc, 'hr', "\n----------\n")
|
33
|
+
simple_replace(doc, 'hr', "\n----------\n\n")
|
34
34
|
simple_replace(doc, 'br', "\n")
|
35
|
+
simple_replace(doc, 'p', "\n\n")
|
35
36
|
|
36
|
-
|
37
|
-
# link_node.inner_html = link_node.inner_html + "\n\n"
|
38
|
-
# end
|
39
|
-
|
40
|
-
# trim, but only single-space character
|
41
|
-
doc.text.gsub(/^ +| +$/, '')
|
37
|
+
doc.text.strip.split("\n").collect(&:strip).join("\n").concat("\n")
|
42
38
|
end
|
43
39
|
|
44
40
|
private
|
@@ -112,7 +108,7 @@ module Ghostwriter
|
|
112
108
|
list_item.inner_html = "#{ marker } #{ list_item.inner_html }\n".squeeze(' ')
|
113
109
|
end
|
114
110
|
|
115
|
-
list_node.replace("
|
111
|
+
list_node.replace("#{ list_node.inner_html }\n")
|
116
112
|
end
|
117
113
|
end
|
118
114
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ghostwriter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robin Miller
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -94,9 +94,10 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '1.10'
|
97
|
-
description:
|
97
|
+
description: |
|
98
|
+
Converts HTML to plain text, preserving as much legibility and functionality as possible.
|
98
99
|
|
99
|
-
|
100
|
+
Ideal for providing a plaintext multipart segment of email messages.
|
100
101
|
email:
|
101
102
|
- robin@tenjin.ca
|
102
103
|
executables: []
|
@@ -142,5 +143,5 @@ requirements: []
|
|
142
143
|
rubygems_version: 3.1.2
|
143
144
|
signing_key:
|
144
145
|
specification_version: 4
|
145
|
-
summary:
|
146
|
+
summary: Converts HTML to plain text
|
146
147
|
test_files: []
|