html2text 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +70 -0
- data/lib/html2text.rb +138 -0
- data/lib/html2text/version.rb +3 -0
- data/spec/examples/anchors.html +12 -0
- data/spec/examples/anchors.txt +5 -0
- data/spec/examples/basic.html +21 -0
- data/spec/examples/basic.txt +13 -0
- data/spec/examples/lists.html +24 -0
- data/spec/examples/lists.txt +17 -0
- data/spec/examples/more-anchors.html +14 -0
- data/spec/examples/more-anchors.txt +7 -0
- data/spec/examples/nbsp.html +1 -0
- data/spec/examples/nbsp.txt +1 -0
- data/spec/examples/table.html +53 -0
- data/spec/examples/table.txt +7 -0
- data/spec/examples/test3.html +1 -0
- data/spec/examples/test3.txt +2 -0
- data/spec/examples/test4.html +1 -0
- data/spec/examples/test4.txt +5 -0
- data/spec/examples_spec.rb +25 -0
- data/spec/html2text_spec.rb +37 -0
- data/spec/spec_helper.rb +4 -0
- metadata +156 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7c84c460e75e64099fa12a010871f9859ab48b9f
|
4
|
+
data.tar.gz: ea56a52568f22804cdcbc44b5f35e6b99164ea6c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3833c4546b86912872d777fc57be15cc0fac89e273e5ad65b6714a0b723f4815a81a3865e9ee0b05746ef7dee356baf5824ace242ab914d26eb79bf3aa6bf65
|
7
|
+
data.tar.gz: 737d869f81c782f93d520e935bb5b26a0a88798f940b60856519a084eabd1dfca84171d673f3abd5e73ecf0f84917909573cd6d92a67510fcdfcc075c4a676ed
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2015 Jevon Wright
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
html2text [![Build Status](https://travis-ci.org/soundasleep/html2text_ruby.svg?branch=master)](https://travis-ci.org/soundasleep/html2text_ruby)
|
2
|
+
==============
|
3
|
+
|
4
|
+
`html2text` is a very simple script that uses Ruby's DOM methods to load HTML from a string, and then iterates over the resulting DOM to correctly output plain text. For example:
|
5
|
+
|
6
|
+
```html
|
7
|
+
<html>
|
8
|
+
<title>Ignored Title</title>
|
9
|
+
<body>
|
10
|
+
<h1>Hello, World!</h1>
|
11
|
+
|
12
|
+
<p>This is some e-mail content.
|
13
|
+
Even though it has whitespace and newlines, the e-mail converter
|
14
|
+
will handle it correctly.
|
15
|
+
|
16
|
+
<p>Even mismatched tags.</p>
|
17
|
+
|
18
|
+
<div>A div</div>
|
19
|
+
<div>Another div</div>
|
20
|
+
<div>A div<div>within a div</div></div>
|
21
|
+
|
22
|
+
<a href="http://foo.com">A link</a>
|
23
|
+
|
24
|
+
</body>
|
25
|
+
</html>
|
26
|
+
```
|
27
|
+
|
28
|
+
Will be converted into:
|
29
|
+
|
30
|
+
```text
|
31
|
+
Hello, World!
|
32
|
+
|
33
|
+
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
|
34
|
+
|
35
|
+
Even mismatched tags.
|
36
|
+
A div
|
37
|
+
Another div
|
38
|
+
A div
|
39
|
+
within a div
|
40
|
+
[A link](http://foo.com)
|
41
|
+
```
|
42
|
+
|
43
|
+
See the [original blog post](http://journals.jevon.org/users/jevon-phd/entry/19818) or the related [StackOverflow answer](http://stackoverflow.com/a/2564472/39531).
|
44
|
+
|
45
|
+
## Installing
|
46
|
+
|
47
|
+
TODO Install the gem, then you can:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
require 'html2text'
|
51
|
+
|
52
|
+
text = Html2Text.convert(html)
|
53
|
+
```
|
54
|
+
|
55
|
+
## Tests
|
56
|
+
|
57
|
+
See all of the test cases defined in [spec/examples/](spec/examples/). These can be run with:
|
58
|
+
|
59
|
+
```
|
60
|
+
bundle install
|
61
|
+
rspec
|
62
|
+
```
|
63
|
+
|
64
|
+
## License
|
65
|
+
|
66
|
+
`html2text` is licensed under MIT.
|
67
|
+
|
68
|
+
## Other versions
|
69
|
+
|
70
|
+
Also see [html2text](https://github.com/soundasleep/html2text), the original PHP implementation.
|
data/lib/html2text.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
class Html2Text
|
4
|
+
attr_reader :doc
|
5
|
+
|
6
|
+
def initialize(doc)
|
7
|
+
@doc = doc
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.convert(html)
|
11
|
+
html = fix_newlines(replace_entities(html))
|
12
|
+
doc = Nokogiri::HTML(html)
|
13
|
+
|
14
|
+
Html2Text.new(doc).convert
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.fix_newlines(text)
|
18
|
+
text.gsub("\r\n", "\n").gsub("\r", "\n")
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.replace_entities(text)
|
22
|
+
text.gsub(" ", " ")
|
23
|
+
end
|
24
|
+
|
25
|
+
def convert
|
26
|
+
output = iterate_over(doc)
|
27
|
+
output = remove_leading_and_trailing_whitespace(output)
|
28
|
+
output.strip
|
29
|
+
end
|
30
|
+
|
31
|
+
def remove_leading_and_trailing_whitespace(text)
|
32
|
+
text.gsub(/[ \t]*\n[ \t]*/im, "\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
def trimmed_whitespace(text)
|
36
|
+
# Replace whitespace characters with a space (equivalent to \s)
|
37
|
+
text.gsub(/[\t\n\f\r ]+/im, " ")
|
38
|
+
end
|
39
|
+
|
40
|
+
def next_node_name(node)
|
41
|
+
next_node = node.next_sibling
|
42
|
+
while next_node != nil
|
43
|
+
break if next_node.element?
|
44
|
+
next_node = next_node.next_sibling
|
45
|
+
end
|
46
|
+
|
47
|
+
if next_node && next_node.element?
|
48
|
+
next_node.name.downcase
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def iterate_over(node)
|
53
|
+
return trimmed_whitespace(node.text) if node.text?
|
54
|
+
|
55
|
+
if ["style", "head", "title", "meta", "script"].include?(node.name.downcase)
|
56
|
+
return ""
|
57
|
+
end
|
58
|
+
|
59
|
+
output = []
|
60
|
+
|
61
|
+
output << prefix_whitespace(node)
|
62
|
+
output += node.children.map do |child|
|
63
|
+
iterate_over(child)
|
64
|
+
end
|
65
|
+
output << suffix_whitespace(node)
|
66
|
+
|
67
|
+
output = output.compact.join("") || ""
|
68
|
+
|
69
|
+
if node.name.downcase == "a"
|
70
|
+
output = wrap_link(node, output)
|
71
|
+
end
|
72
|
+
|
73
|
+
output
|
74
|
+
end
|
75
|
+
|
76
|
+
def prefix_whitespace(node)
|
77
|
+
case node.name.downcase
|
78
|
+
when "hr"
|
79
|
+
"------\n"
|
80
|
+
|
81
|
+
when "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul"
|
82
|
+
"\n"
|
83
|
+
|
84
|
+
when "tr", "p", "div"
|
85
|
+
"\n"
|
86
|
+
|
87
|
+
when "td", "th"
|
88
|
+
"\t"
|
89
|
+
|
90
|
+
when "li"
|
91
|
+
"- "
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def suffix_whitespace(node)
|
96
|
+
case node.name.downcase
|
97
|
+
when "h1", "h2", "h3", "h4", "h5", "h6"
|
98
|
+
# add another line
|
99
|
+
"\n"
|
100
|
+
|
101
|
+
when "p", "br"
|
102
|
+
"\n" if next_node_name(node) != "div"
|
103
|
+
|
104
|
+
when "li"
|
105
|
+
"\n"
|
106
|
+
|
107
|
+
when "div"
|
108
|
+
# add one line only if the next child isn't a div
|
109
|
+
"\n" if next_node_name(node) != "div" && next_node_name(node) != nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# links are returned in [text](link) format
|
114
|
+
def wrap_link(node, output)
|
115
|
+
href = node.attribute("href")
|
116
|
+
name = node.attribute("name")
|
117
|
+
|
118
|
+
if href.nil?
|
119
|
+
if !name.nil?
|
120
|
+
output = "[#{output}]"
|
121
|
+
end
|
122
|
+
else
|
123
|
+
href = href.to_s
|
124
|
+
|
125
|
+
if href != output && href != "mailto:#{output}" &&
|
126
|
+
href != "http://#{output}" && href != "https://#{output}"
|
127
|
+
output = "[#{output}](#{href})"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
case next_node_name(node)
|
132
|
+
when "h1", "h2", "h3", "h4", "h5", "h6"
|
133
|
+
output += "\n"
|
134
|
+
end
|
135
|
+
|
136
|
+
output
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
A document without any HTML open/closing tags.
|
2
|
+
|
3
|
+
<hr>
|
4
|
+
|
5
|
+
We try and use the representation given by common browsers of the
|
6
|
+
HTML document, so that it looks similar when converted to plain text.
|
7
|
+
|
8
|
+
<a href="http://foo.com">visit foo.com</a> - or <a href="http://www.foo.com">http://www.foo.com</a>
|
9
|
+
|
10
|
+
<a href="http://foo.com" title="a link with a title">link</a>
|
11
|
+
|
12
|
+
<h2><a name="anchor">An anchor which will not appear</a></h2>
|
@@ -0,0 +1,5 @@
|
|
1
|
+
A document without any HTML open/closing tags.
|
2
|
+
------
|
3
|
+
We try and use the representation given by common browsers of the HTML document, so that it looks similar when converted to plain text. [visit foo.com](http://foo.com) - or http://www.foo.com [link](http://foo.com)
|
4
|
+
|
5
|
+
[An anchor which will not appear]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<html>
|
2
|
+
<title>Ignored Title</title>
|
3
|
+
<body>
|
4
|
+
<h1>Hello, World!</h1>
|
5
|
+
|
6
|
+
<p>This is some e-mail content.
|
7
|
+
Even though it has whitespace and newlines, the e-mail converter
|
8
|
+
will handle it correctly.
|
9
|
+
|
10
|
+
<p>Even mismatched tags.</p>
|
11
|
+
|
12
|
+
<div>A div</div>
|
13
|
+
<div>Another div</div>
|
14
|
+
<div>A div<div>within a div</div></div>
|
15
|
+
|
16
|
+
<p>Another line<br />Yet another line</p>
|
17
|
+
|
18
|
+
<a href="http://foo.com">A link</a>
|
19
|
+
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Hello, World!
|
2
|
+
|
3
|
+
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
|
4
|
+
|
5
|
+
Even mismatched tags.
|
6
|
+
A div
|
7
|
+
Another div
|
8
|
+
A div
|
9
|
+
within a div
|
10
|
+
|
11
|
+
Another line
|
12
|
+
Yet another line
|
13
|
+
[A link](http://foo.com)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<h1>List tests</h1>
|
2
|
+
|
3
|
+
<p>
|
4
|
+
Add some lists.
|
5
|
+
</p>
|
6
|
+
|
7
|
+
<ol>
|
8
|
+
<li>one</li>
|
9
|
+
<li>two
|
10
|
+
<li>three
|
11
|
+
</ol>
|
12
|
+
|
13
|
+
<h2>An unordered list</h2>
|
14
|
+
|
15
|
+
<ul>
|
16
|
+
<li>one
|
17
|
+
<li>two</li>
|
18
|
+
<li>three</li>
|
19
|
+
</ul>
|
20
|
+
<ul>
|
21
|
+
<li>one
|
22
|
+
<li>two</li>
|
23
|
+
<li>three</li>
|
24
|
+
</ul>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<h1>Anchor tests</h1>
|
2
|
+
|
3
|
+
<p>
|
4
|
+
Visit http://openiaml.org or <a href="http://openiaml.org">openiaml.org</a> or <a href="http://openiaml.org">http://openiaml.org</a>.
|
5
|
+
</p>
|
6
|
+
|
7
|
+
<p>
|
8
|
+
To visit with SSL, visit https://openiaml.org or <a href="https://openiaml.org">openiaml.org</a> or <a href="https://openiaml.org">https://openiaml.org</a>.
|
9
|
+
</p>
|
10
|
+
|
11
|
+
<p>
|
12
|
+
To mail, email support@openiaml.org or mailto:support@openiaml.org
|
13
|
+
or <a href="mailto:support@openiaml.org">support@openiaml.org</a> or <a href="mailto:support@openiaml.org">mailto:support@openiaml.org</a>.
|
14
|
+
</p>
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Anchor tests
|
2
|
+
|
3
|
+
Visit http://openiaml.org or openiaml.org or http://openiaml.org.
|
4
|
+
|
5
|
+
To visit with SSL, visit https://openiaml.org or openiaml.org or https://openiaml.org.
|
6
|
+
|
7
|
+
To mail, email support@openiaml.org or mailto:support@openiaml.org or support@openiaml.org or mailto:support@openiaml.org.
|
@@ -0,0 +1 @@
|
|
1
|
+
hello world & people < > &NBSP;
|
@@ -0,0 +1 @@
|
|
1
|
+
hello world & people < > &NBSP;
|
@@ -0,0 +1,53 @@
|
|
1
|
+
<html>
|
2
|
+
<title>Ignored Title</title>
|
3
|
+
<body>
|
4
|
+
<h1>Hello, World!</h1>
|
5
|
+
<table>
|
6
|
+
<thead>
|
7
|
+
<tr>
|
8
|
+
<th>Col A</th>
|
9
|
+
<th>Col B</th>
|
10
|
+
</tr>
|
11
|
+
</thead>
|
12
|
+
<tbody>
|
13
|
+
<tr>
|
14
|
+
<td>
|
15
|
+
Data A1
|
16
|
+
</td>
|
17
|
+
<td>
|
18
|
+
Data B1
|
19
|
+
</td>
|
20
|
+
</tr>
|
21
|
+
<tr>
|
22
|
+
<td>
|
23
|
+
Data A2
|
24
|
+
</td>
|
25
|
+
<td>
|
26
|
+
Data B2
|
27
|
+
</td>
|
28
|
+
</tr>
|
29
|
+
<tr>
|
30
|
+
<td>
|
31
|
+
Data A3
|
32
|
+
</td>
|
33
|
+
<td>
|
34
|
+
Data B4
|
35
|
+
</td>
|
36
|
+
</tr>
|
37
|
+
</tbody>
|
38
|
+
<tfoot>
|
39
|
+
<tr>
|
40
|
+
<td>
|
41
|
+
Total A
|
42
|
+
</td>
|
43
|
+
<td>
|
44
|
+
Total B
|
45
|
+
</td>
|
46
|
+
</tr>
|
47
|
+
|
48
|
+
</tfoot>
|
49
|
+
|
50
|
+
</table>
|
51
|
+
|
52
|
+
</body>
|
53
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
test one<br />test two
|
@@ -0,0 +1 @@
|
|
1
|
+
1<br />2<br />3<br />4<br />5 6
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Html2Text do
|
4
|
+
describe "#convert" do
|
5
|
+
let(:text) { Html2Text.convert(html) }
|
6
|
+
|
7
|
+
examples = Dir[File.dirname(__FILE__) + "/examples/*.html"]
|
8
|
+
|
9
|
+
examples.each do |filename|
|
10
|
+
context "#{filename}" do
|
11
|
+
let(:html) { File.read(filename) }
|
12
|
+
let(:text_file) { filename.sub(".html", ".txt") }
|
13
|
+
let(:expected) { Html2Text.fix_newlines(File.read(text_file)) }
|
14
|
+
|
15
|
+
it "converts to text" do
|
16
|
+
expect(text).to eq(expected)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "has examples to test" do
|
22
|
+
expect(examples.size).to_not eq(0)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Html2Text do
|
4
|
+
describe "#convert" do
|
5
|
+
let(:text) { Html2Text.convert(html) }
|
6
|
+
|
7
|
+
context "an empty line" do
|
8
|
+
let(:html) { "" }
|
9
|
+
|
10
|
+
it "is an empty line" do
|
11
|
+
expect(text).to eq("")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context "a simple string" do
|
16
|
+
let(:html) { "hello world" }
|
17
|
+
|
18
|
+
it "is an empty line" do
|
19
|
+
expect(text).to eq("hello world")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "#remove_leading_and_trailing_whitespace" do
|
25
|
+
let(:subject) { Html2Text.new(nil).remove_leading_and_trailing_whitespace(input) }
|
26
|
+
|
27
|
+
context "an empty string" do
|
28
|
+
let(:input) { "" }
|
29
|
+
it { is_expected.to eq("") }
|
30
|
+
end
|
31
|
+
|
32
|
+
context "many new lines" do
|
33
|
+
let(:input) { "hello\n world \n yes" }
|
34
|
+
it { is_expected.to eq("hello\nworld\nyes") }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html2text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jevon Wright
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-12-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec-collection_matchers
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: colorize
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: A Ruby component to convert HTML into a plain text format.
|
84
|
+
email:
|
85
|
+
- jevon@powershop.co.nz
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- MIT-LICENSE
|
91
|
+
- README.md
|
92
|
+
- lib/html2text.rb
|
93
|
+
- lib/html2text/version.rb
|
94
|
+
- spec/examples/anchors.html
|
95
|
+
- spec/examples/anchors.txt
|
96
|
+
- spec/examples/basic.html
|
97
|
+
- spec/examples/basic.txt
|
98
|
+
- spec/examples/lists.html
|
99
|
+
- spec/examples/lists.txt
|
100
|
+
- spec/examples/more-anchors.html
|
101
|
+
- spec/examples/more-anchors.txt
|
102
|
+
- spec/examples/nbsp.html
|
103
|
+
- spec/examples/nbsp.txt
|
104
|
+
- spec/examples/table.html
|
105
|
+
- spec/examples/table.txt
|
106
|
+
- spec/examples/test3.html
|
107
|
+
- spec/examples/test3.txt
|
108
|
+
- spec/examples/test4.html
|
109
|
+
- spec/examples/test4.txt
|
110
|
+
- spec/examples_spec.rb
|
111
|
+
- spec/html2text_spec.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
homepage: https://github.com/soundasleep/html2text_ruby
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.4.5
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Convert HTML into plain text.
|
137
|
+
test_files:
|
138
|
+
- spec/examples/anchors.html
|
139
|
+
- spec/examples/anchors.txt
|
140
|
+
- spec/examples/basic.html
|
141
|
+
- spec/examples/basic.txt
|
142
|
+
- spec/examples/lists.html
|
143
|
+
- spec/examples/lists.txt
|
144
|
+
- spec/examples/more-anchors.html
|
145
|
+
- spec/examples/more-anchors.txt
|
146
|
+
- spec/examples/nbsp.html
|
147
|
+
- spec/examples/nbsp.txt
|
148
|
+
- spec/examples/table.html
|
149
|
+
- spec/examples/table.txt
|
150
|
+
- spec/examples/test3.html
|
151
|
+
- spec/examples/test3.txt
|
152
|
+
- spec/examples/test4.html
|
153
|
+
- spec/examples/test4.txt
|
154
|
+
- spec/examples_spec.rb
|
155
|
+
- spec/html2text_spec.rb
|
156
|
+
- spec/spec_helper.rb
|