html2text 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +70 -0
- data/lib/html2text.rb +138 -0
- data/lib/html2text/version.rb +3 -0
- data/spec/examples/anchors.html +12 -0
- data/spec/examples/anchors.txt +5 -0
- data/spec/examples/basic.html +21 -0
- data/spec/examples/basic.txt +13 -0
- data/spec/examples/lists.html +24 -0
- data/spec/examples/lists.txt +17 -0
- data/spec/examples/more-anchors.html +14 -0
- data/spec/examples/more-anchors.txt +7 -0
- data/spec/examples/nbsp.html +1 -0
- data/spec/examples/nbsp.txt +1 -0
- data/spec/examples/table.html +53 -0
- data/spec/examples/table.txt +7 -0
- data/spec/examples/test3.html +1 -0
- data/spec/examples/test3.txt +2 -0
- data/spec/examples/test4.html +1 -0
- data/spec/examples/test4.txt +5 -0
- data/spec/examples_spec.rb +25 -0
- data/spec/html2text_spec.rb +37 -0
- data/spec/spec_helper.rb +4 -0
- metadata +156 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7c84c460e75e64099fa12a010871f9859ab48b9f
|
4
|
+
data.tar.gz: ea56a52568f22804cdcbc44b5f35e6b99164ea6c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3833c4546b86912872d777fc57be15cc0fac89e273e5ad65b6714a0b723f4815a81a3865e9ee0b05746ef7dee356baf5824ace242ab914d26eb79bf3aa6bf65
|
7
|
+
data.tar.gz: 737d869f81c782f93d520e935bb5b26a0a88798f940b60856519a084eabd1dfca84171d673f3abd5e73ecf0f84917909573cd6d92a67510fcdfcc075c4a676ed
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2015 Jevon Wright
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
html2text [](https://travis-ci.org/soundasleep/html2text_ruby)
|
2
|
+
==============
|
3
|
+
|
4
|
+
`html2text` is a very simple script that uses Ruby's DOM methods to load HTML from a string, and then iterates over the resulting DOM to correctly output plain text. For example:
|
5
|
+
|
6
|
+
```html
|
7
|
+
<html>
|
8
|
+
<title>Ignored Title</title>
|
9
|
+
<body>
|
10
|
+
<h1>Hello, World!</h1>
|
11
|
+
|
12
|
+
<p>This is some e-mail content.
|
13
|
+
Even though it has whitespace and newlines, the e-mail converter
|
14
|
+
will handle it correctly.
|
15
|
+
|
16
|
+
<p>Even mismatched tags.</p>
|
17
|
+
|
18
|
+
<div>A div</div>
|
19
|
+
<div>Another div</div>
|
20
|
+
<div>A div<div>within a div</div></div>
|
21
|
+
|
22
|
+
<a href="http://foo.com">A link</a>
|
23
|
+
|
24
|
+
</body>
|
25
|
+
</html>
|
26
|
+
```
|
27
|
+
|
28
|
+
Will be converted into:
|
29
|
+
|
30
|
+
```text
|
31
|
+
Hello, World!
|
32
|
+
|
33
|
+
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
|
34
|
+
|
35
|
+
Even mismatched tags.
|
36
|
+
A div
|
37
|
+
Another div
|
38
|
+
A div
|
39
|
+
within a div
|
40
|
+
[A link](http://foo.com)
|
41
|
+
```
|
42
|
+
|
43
|
+
See the [original blog post](http://journals.jevon.org/users/jevon-phd/entry/19818) or the related [StackOverflow answer](http://stackoverflow.com/a/2564472/39531).
|
44
|
+
|
45
|
+
## Installing
|
46
|
+
|
47
|
+
TODO Install the gem, then you can:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
require 'html2text'
|
51
|
+
|
52
|
+
text = Html2Text.convert(html)
|
53
|
+
```
|
54
|
+
|
55
|
+
## Tests
|
56
|
+
|
57
|
+
See all of the test cases defined in [spec/examples/](spec/examples/). These can be run with:
|
58
|
+
|
59
|
+
```
|
60
|
+
bundle install
|
61
|
+
rspec
|
62
|
+
```
|
63
|
+
|
64
|
+
## License
|
65
|
+
|
66
|
+
`html2text` is licensed under MIT.
|
67
|
+
|
68
|
+
## Other versions
|
69
|
+
|
70
|
+
Also see [html2text](https://github.com/soundasleep/html2text), the original PHP implementation.
|
data/lib/html2text.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
class Html2Text
|
4
|
+
attr_reader :doc
|
5
|
+
|
6
|
+
def initialize(doc)
|
7
|
+
@doc = doc
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.convert(html)
|
11
|
+
html = fix_newlines(replace_entities(html))
|
12
|
+
doc = Nokogiri::HTML(html)
|
13
|
+
|
14
|
+
Html2Text.new(doc).convert
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.fix_newlines(text)
|
18
|
+
text.gsub("\r\n", "\n").gsub("\r", "\n")
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.replace_entities(text)
|
22
|
+
text.gsub(" ", " ")
|
23
|
+
end
|
24
|
+
|
25
|
+
def convert
|
26
|
+
output = iterate_over(doc)
|
27
|
+
output = remove_leading_and_trailing_whitespace(output)
|
28
|
+
output.strip
|
29
|
+
end
|
30
|
+
|
31
|
+
def remove_leading_and_trailing_whitespace(text)
|
32
|
+
text.gsub(/[ \t]*\n[ \t]*/im, "\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
def trimmed_whitespace(text)
|
36
|
+
# Replace whitespace characters with a space (equivalent to \s)
|
37
|
+
text.gsub(/[\t\n\f\r ]+/im, " ")
|
38
|
+
end
|
39
|
+
|
40
|
+
def next_node_name(node)
|
41
|
+
next_node = node.next_sibling
|
42
|
+
while next_node != nil
|
43
|
+
break if next_node.element?
|
44
|
+
next_node = next_node.next_sibling
|
45
|
+
end
|
46
|
+
|
47
|
+
if next_node && next_node.element?
|
48
|
+
next_node.name.downcase
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def iterate_over(node)
|
53
|
+
return trimmed_whitespace(node.text) if node.text?
|
54
|
+
|
55
|
+
if ["style", "head", "title", "meta", "script"].include?(node.name.downcase)
|
56
|
+
return ""
|
57
|
+
end
|
58
|
+
|
59
|
+
output = []
|
60
|
+
|
61
|
+
output << prefix_whitespace(node)
|
62
|
+
output += node.children.map do |child|
|
63
|
+
iterate_over(child)
|
64
|
+
end
|
65
|
+
output << suffix_whitespace(node)
|
66
|
+
|
67
|
+
output = output.compact.join("") || ""
|
68
|
+
|
69
|
+
if node.name.downcase == "a"
|
70
|
+
output = wrap_link(node, output)
|
71
|
+
end
|
72
|
+
|
73
|
+
output
|
74
|
+
end
|
75
|
+
|
76
|
+
def prefix_whitespace(node)
|
77
|
+
case node.name.downcase
|
78
|
+
when "hr"
|
79
|
+
"------\n"
|
80
|
+
|
81
|
+
when "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul"
|
82
|
+
"\n"
|
83
|
+
|
84
|
+
when "tr", "p", "div"
|
85
|
+
"\n"
|
86
|
+
|
87
|
+
when "td", "th"
|
88
|
+
"\t"
|
89
|
+
|
90
|
+
when "li"
|
91
|
+
"- "
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def suffix_whitespace(node)
|
96
|
+
case node.name.downcase
|
97
|
+
when "h1", "h2", "h3", "h4", "h5", "h6"
|
98
|
+
# add another line
|
99
|
+
"\n"
|
100
|
+
|
101
|
+
when "p", "br"
|
102
|
+
"\n" if next_node_name(node) != "div"
|
103
|
+
|
104
|
+
when "li"
|
105
|
+
"\n"
|
106
|
+
|
107
|
+
when "div"
|
108
|
+
# add one line only if the next child isn't a div
|
109
|
+
"\n" if next_node_name(node) != "div" && next_node_name(node) != nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# links are returned in [text](link) format
|
114
|
+
def wrap_link(node, output)
|
115
|
+
href = node.attribute("href")
|
116
|
+
name = node.attribute("name")
|
117
|
+
|
118
|
+
if href.nil?
|
119
|
+
if !name.nil?
|
120
|
+
output = "[#{output}]"
|
121
|
+
end
|
122
|
+
else
|
123
|
+
href = href.to_s
|
124
|
+
|
125
|
+
if href != output && href != "mailto:#{output}" &&
|
126
|
+
href != "http://#{output}" && href != "https://#{output}"
|
127
|
+
output = "[#{output}](#{href})"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
case next_node_name(node)
|
132
|
+
when "h1", "h2", "h3", "h4", "h5", "h6"
|
133
|
+
output += "\n"
|
134
|
+
end
|
135
|
+
|
136
|
+
output
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
A document without any HTML open/closing tags.
|
2
|
+
|
3
|
+
<hr>
|
4
|
+
|
5
|
+
We try and use the representation given by common browsers of the
|
6
|
+
HTML document, so that it looks similar when converted to plain text.
|
7
|
+
|
8
|
+
<a href="http://foo.com">visit foo.com</a> - or <a href="http://www.foo.com">http://www.foo.com</a>
|
9
|
+
|
10
|
+
<a href="http://foo.com" title="a link with a title">link</a>
|
11
|
+
|
12
|
+
<h2><a name="anchor">An anchor which will not appear</a></h2>
|
@@ -0,0 +1,5 @@
|
|
1
|
+
A document without any HTML open/closing tags.
|
2
|
+
------
|
3
|
+
We try and use the representation given by common browsers of the HTML document, so that it looks similar when converted to plain text. [visit foo.com](http://foo.com) - or http://www.foo.com [link](http://foo.com)
|
4
|
+
|
5
|
+
[An anchor which will not appear]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<html>
|
2
|
+
<title>Ignored Title</title>
|
3
|
+
<body>
|
4
|
+
<h1>Hello, World!</h1>
|
5
|
+
|
6
|
+
<p>This is some e-mail content.
|
7
|
+
Even though it has whitespace and newlines, the e-mail converter
|
8
|
+
will handle it correctly.
|
9
|
+
|
10
|
+
<p>Even mismatched tags.</p>
|
11
|
+
|
12
|
+
<div>A div</div>
|
13
|
+
<div>Another div</div>
|
14
|
+
<div>A div<div>within a div</div></div>
|
15
|
+
|
16
|
+
<p>Another line<br />Yet another line</p>
|
17
|
+
|
18
|
+
<a href="http://foo.com">A link</a>
|
19
|
+
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Hello, World!
|
2
|
+
|
3
|
+
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
|
4
|
+
|
5
|
+
Even mismatched tags.
|
6
|
+
A div
|
7
|
+
Another div
|
8
|
+
A div
|
9
|
+
within a div
|
10
|
+
|
11
|
+
Another line
|
12
|
+
Yet another line
|
13
|
+
[A link](http://foo.com)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<h1>List tests</h1>
|
2
|
+
|
3
|
+
<p>
|
4
|
+
Add some lists.
|
5
|
+
</p>
|
6
|
+
|
7
|
+
<ol>
|
8
|
+
<li>one</li>
|
9
|
+
<li>two
|
10
|
+
<li>three
|
11
|
+
</ol>
|
12
|
+
|
13
|
+
<h2>An unordered list</h2>
|
14
|
+
|
15
|
+
<ul>
|
16
|
+
<li>one
|
17
|
+
<li>two</li>
|
18
|
+
<li>three</li>
|
19
|
+
</ul>
|
20
|
+
<ul>
|
21
|
+
<li>one
|
22
|
+
<li>two</li>
|
23
|
+
<li>three</li>
|
24
|
+
</ul>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<h1>Anchor tests</h1>
|
2
|
+
|
3
|
+
<p>
|
4
|
+
Visit http://openiaml.org or <a href="http://openiaml.org">openiaml.org</a> or <a href="http://openiaml.org">http://openiaml.org</a>.
|
5
|
+
</p>
|
6
|
+
|
7
|
+
<p>
|
8
|
+
To visit with SSL, visit https://openiaml.org or <a href="https://openiaml.org">openiaml.org</a> or <a href="https://openiaml.org">https://openiaml.org</a>.
|
9
|
+
</p>
|
10
|
+
|
11
|
+
<p>
|
12
|
+
To mail, email support@openiaml.org or mailto:support@openiaml.org
|
13
|
+
or <a href="mailto:support@openiaml.org">support@openiaml.org</a> or <a href="mailto:support@openiaml.org">mailto:support@openiaml.org</a>.
|
14
|
+
</p>
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Anchor tests
|
2
|
+
|
3
|
+
Visit http://openiaml.org or openiaml.org or http://openiaml.org.
|
4
|
+
|
5
|
+
To visit with SSL, visit https://openiaml.org or openiaml.org or https://openiaml.org.
|
6
|
+
|
7
|
+
To mail, email support@openiaml.org or mailto:support@openiaml.org or support@openiaml.org or mailto:support@openiaml.org.
|
@@ -0,0 +1 @@
|
|
1
|
+
hello world & people < > &NBSP;
|
@@ -0,0 +1 @@
|
|
1
|
+
hello world & people < > &NBSP;
|
@@ -0,0 +1,53 @@
|
|
1
|
+
<html>
|
2
|
+
<title>Ignored Title</title>
|
3
|
+
<body>
|
4
|
+
<h1>Hello, World!</h1>
|
5
|
+
<table>
|
6
|
+
<thead>
|
7
|
+
<tr>
|
8
|
+
<th>Col A</th>
|
9
|
+
<th>Col B</th>
|
10
|
+
</tr>
|
11
|
+
</thead>
|
12
|
+
<tbody>
|
13
|
+
<tr>
|
14
|
+
<td>
|
15
|
+
Data A1
|
16
|
+
</td>
|
17
|
+
<td>
|
18
|
+
Data B1
|
19
|
+
</td>
|
20
|
+
</tr>
|
21
|
+
<tr>
|
22
|
+
<td>
|
23
|
+
Data A2
|
24
|
+
</td>
|
25
|
+
<td>
|
26
|
+
Data B2
|
27
|
+
</td>
|
28
|
+
</tr>
|
29
|
+
<tr>
|
30
|
+
<td>
|
31
|
+
Data A3
|
32
|
+
</td>
|
33
|
+
<td>
|
34
|
+
Data B4
|
35
|
+
</td>
|
36
|
+
</tr>
|
37
|
+
</tbody>
|
38
|
+
<tfoot>
|
39
|
+
<tr>
|
40
|
+
<td>
|
41
|
+
Total A
|
42
|
+
</td>
|
43
|
+
<td>
|
44
|
+
Total B
|
45
|
+
</td>
|
46
|
+
</tr>
|
47
|
+
|
48
|
+
</tfoot>
|
49
|
+
|
50
|
+
</table>
|
51
|
+
|
52
|
+
</body>
|
53
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
test one<br />test two
|
@@ -0,0 +1 @@
|
|
1
|
+
1<br />2<br />3<br />4<br />5 6
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Html2Text do
|
4
|
+
describe "#convert" do
|
5
|
+
let(:text) { Html2Text.convert(html) }
|
6
|
+
|
7
|
+
examples = Dir[File.dirname(__FILE__) + "/examples/*.html"]
|
8
|
+
|
9
|
+
examples.each do |filename|
|
10
|
+
context "#{filename}" do
|
11
|
+
let(:html) { File.read(filename) }
|
12
|
+
let(:text_file) { filename.sub(".html", ".txt") }
|
13
|
+
let(:expected) { Html2Text.fix_newlines(File.read(text_file)) }
|
14
|
+
|
15
|
+
it "converts to text" do
|
16
|
+
expect(text).to eq(expected)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "has examples to test" do
|
22
|
+
expect(examples.size).to_not eq(0)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Html2Text do
|
4
|
+
describe "#convert" do
|
5
|
+
let(:text) { Html2Text.convert(html) }
|
6
|
+
|
7
|
+
context "an empty line" do
|
8
|
+
let(:html) { "" }
|
9
|
+
|
10
|
+
it "is an empty line" do
|
11
|
+
expect(text).to eq("")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context "a simple string" do
|
16
|
+
let(:html) { "hello world" }
|
17
|
+
|
18
|
+
it "is an empty line" do
|
19
|
+
expect(text).to eq("hello world")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "#remove_leading_and_trailing_whitespace" do
|
25
|
+
let(:subject) { Html2Text.new(nil).remove_leading_and_trailing_whitespace(input) }
|
26
|
+
|
27
|
+
context "an empty string" do
|
28
|
+
let(:input) { "" }
|
29
|
+
it { is_expected.to eq("") }
|
30
|
+
end
|
31
|
+
|
32
|
+
context "many new lines" do
|
33
|
+
let(:input) { "hello\n world \n yes" }
|
34
|
+
it { is_expected.to eq("hello\nworld\nyes") }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html2text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jevon Wright
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-12-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec-collection_matchers
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: colorize
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: A Ruby component to convert HTML into a plain text format.
|
84
|
+
email:
|
85
|
+
- jevon@powershop.co.nz
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- MIT-LICENSE
|
91
|
+
- README.md
|
92
|
+
- lib/html2text.rb
|
93
|
+
- lib/html2text/version.rb
|
94
|
+
- spec/examples/anchors.html
|
95
|
+
- spec/examples/anchors.txt
|
96
|
+
- spec/examples/basic.html
|
97
|
+
- spec/examples/basic.txt
|
98
|
+
- spec/examples/lists.html
|
99
|
+
- spec/examples/lists.txt
|
100
|
+
- spec/examples/more-anchors.html
|
101
|
+
- spec/examples/more-anchors.txt
|
102
|
+
- spec/examples/nbsp.html
|
103
|
+
- spec/examples/nbsp.txt
|
104
|
+
- spec/examples/table.html
|
105
|
+
- spec/examples/table.txt
|
106
|
+
- spec/examples/test3.html
|
107
|
+
- spec/examples/test3.txt
|
108
|
+
- spec/examples/test4.html
|
109
|
+
- spec/examples/test4.txt
|
110
|
+
- spec/examples_spec.rb
|
111
|
+
- spec/html2text_spec.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
homepage: https://github.com/soundasleep/html2text_ruby
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.4.5
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Convert HTML into plain text.
|
137
|
+
test_files:
|
138
|
+
- spec/examples/anchors.html
|
139
|
+
- spec/examples/anchors.txt
|
140
|
+
- spec/examples/basic.html
|
141
|
+
- spec/examples/basic.txt
|
142
|
+
- spec/examples/lists.html
|
143
|
+
- spec/examples/lists.txt
|
144
|
+
- spec/examples/more-anchors.html
|
145
|
+
- spec/examples/more-anchors.txt
|
146
|
+
- spec/examples/nbsp.html
|
147
|
+
- spec/examples/nbsp.txt
|
148
|
+
- spec/examples/table.html
|
149
|
+
- spec/examples/table.txt
|
150
|
+
- spec/examples/test3.html
|
151
|
+
- spec/examples/test3.txt
|
152
|
+
- spec/examples/test4.html
|
153
|
+
- spec/examples/test4.txt
|
154
|
+
- spec/examples_spec.rb
|
155
|
+
- spec/html2text_spec.rb
|
156
|
+
- spec/spec_helper.rb
|