html2text 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/CHANGELOG.md +50 -0
- data/README.md +19 -14
- data/lib/html2text/version.rb +3 -1
- data/lib/html2text.rb +158 -69
- metadata +90 -73
- data/spec/examples/anchors.html +0 -12
- data/spec/examples/anchors.txt +0 -5
- data/spec/examples/basic.html +0 -21
- data/spec/examples/basic.txt +0 -13
- data/spec/examples/full_email.html +0 -220
- data/spec/examples/full_email.txt +0 -54
- data/spec/examples/images.html +0 -54
- data/spec/examples/images.txt +0 -27
- data/spec/examples/lists.html +0 -24
- data/spec/examples/lists.txt +0 -17
- data/spec/examples/more-anchors.html +0 -14
- data/spec/examples/more-anchors.txt +0 -7
- data/spec/examples/nbsp.html +0 -1
- data/spec/examples/nbsp.txt +0 -1
- data/spec/examples/table.html +0 -53
- data/spec/examples/table.txt +0 -7
- data/spec/examples/test3.html +0 -1
- data/spec/examples/test3.txt +0 -2
- data/spec/examples/test4.html +0 -1
- data/spec/examples/test4.txt +0 -5
- data/spec/examples_spec.rb +0 -29
- data/spec/html2text_spec.rb +0 -37
- data/spec/spec_helper.rb +0 -4
data/spec/examples/images.txt
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
One:
|
2
|
-
|
3
|
-
Two: [two]
|
4
|
-
|
5
|
-
Three: [three]
|
6
|
-
|
7
|
-
Four: [four]
|
8
|
-
|
9
|
-
With links
|
10
|
-
|
11
|
-
One: http://localhost
|
12
|
-
|
13
|
-
Two: [two](http://localhost)
|
14
|
-
|
15
|
-
Three: [three](http://localhost)
|
16
|
-
|
17
|
-
Four: [four](http://localhost)
|
18
|
-
|
19
|
-
With links with titles
|
20
|
-
|
21
|
-
One: [one link](http://localhost)
|
22
|
-
|
23
|
-
Two: [two link](http://localhost)
|
24
|
-
|
25
|
-
Three: [three link](http://localhost)
|
26
|
-
|
27
|
-
Four: [four link](http://localhost)
|
data/spec/examples/lists.html
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
<h1>List tests</h1>
|
2
|
-
|
3
|
-
<p>
|
4
|
-
Add some lists.
|
5
|
-
</p>
|
6
|
-
|
7
|
-
<ol>
|
8
|
-
<li>one</li>
|
9
|
-
<li>two
|
10
|
-
<li>three
|
11
|
-
</ol>
|
12
|
-
|
13
|
-
<h2>An unordered list</h2>
|
14
|
-
|
15
|
-
<ul>
|
16
|
-
<li>one
|
17
|
-
<li>two</li>
|
18
|
-
<li>three</li>
|
19
|
-
</ul>
|
20
|
-
<ul>
|
21
|
-
<li>one
|
22
|
-
<li>two</li>
|
23
|
-
<li>three</li>
|
24
|
-
</ul>
|
data/spec/examples/lists.txt
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
<h1>Anchor tests</h1>
|
2
|
-
|
3
|
-
<p>
|
4
|
-
Visit http://openiaml.org or <a href="http://openiaml.org">openiaml.org</a> or <a href="http://openiaml.org">http://openiaml.org</a>.
|
5
|
-
</p>
|
6
|
-
|
7
|
-
<p>
|
8
|
-
To visit with SSL, visit https://openiaml.org or <a href="https://openiaml.org">openiaml.org</a> or <a href="https://openiaml.org">https://openiaml.org</a>.
|
9
|
-
</p>
|
10
|
-
|
11
|
-
<p>
|
12
|
-
To mail, email support@openiaml.org or mailto:support@openiaml.org
|
13
|
-
or <a href="mailto:support@openiaml.org">support@openiaml.org</a> or <a href="mailto:support@openiaml.org">mailto:support@openiaml.org</a>.
|
14
|
-
</p>
|
@@ -1,7 +0,0 @@
|
|
1
|
-
Anchor tests
|
2
|
-
|
3
|
-
Visit http://openiaml.org or openiaml.org or http://openiaml.org.
|
4
|
-
|
5
|
-
To visit with SSL, visit https://openiaml.org or openiaml.org or https://openiaml.org.
|
6
|
-
|
7
|
-
To mail, email support@openiaml.org or mailto:support@openiaml.org or support@openiaml.org or mailto:support@openiaml.org.
|
data/spec/examples/nbsp.html
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
hello world & people < > &NBSP;
|
data/spec/examples/nbsp.txt
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
hello world & people < > &NBSP;
|
data/spec/examples/table.html
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
<html>
|
2
|
-
<title>Ignored Title</title>
|
3
|
-
<body>
|
4
|
-
<h1>Hello, World!</h1>
|
5
|
-
<table>
|
6
|
-
<thead>
|
7
|
-
<tr>
|
8
|
-
<th>Col A</th>
|
9
|
-
<th>Col B</th>
|
10
|
-
</tr>
|
11
|
-
</thead>
|
12
|
-
<tbody>
|
13
|
-
<tr>
|
14
|
-
<td>
|
15
|
-
Data A1
|
16
|
-
</td>
|
17
|
-
<td>
|
18
|
-
Data B1
|
19
|
-
</td>
|
20
|
-
</tr>
|
21
|
-
<tr>
|
22
|
-
<td>
|
23
|
-
Data A2
|
24
|
-
</td>
|
25
|
-
<td>
|
26
|
-
Data B2
|
27
|
-
</td>
|
28
|
-
</tr>
|
29
|
-
<tr>
|
30
|
-
<td>
|
31
|
-
Data A3
|
32
|
-
</td>
|
33
|
-
<td>
|
34
|
-
Data B4
|
35
|
-
</td>
|
36
|
-
</tr>
|
37
|
-
</tbody>
|
38
|
-
<tfoot>
|
39
|
-
<tr>
|
40
|
-
<td>
|
41
|
-
Total A
|
42
|
-
</td>
|
43
|
-
<td>
|
44
|
-
Total B
|
45
|
-
</td>
|
46
|
-
</tr>
|
47
|
-
|
48
|
-
</tfoot>
|
49
|
-
|
50
|
-
</table>
|
51
|
-
|
52
|
-
</body>
|
53
|
-
</html>
|
data/spec/examples/table.txt
DELETED
data/spec/examples/test3.html
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
test one<br />test two
|
data/spec/examples/test3.txt
DELETED
data/spec/examples/test4.html
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
1<br />2<br />3<br />4<br />5 6
|
data/spec/examples_spec.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe Html2Text do
|
4
|
-
describe "#convert" do
|
5
|
-
let(:text) { Html2Text.convert(html) }
|
6
|
-
|
7
|
-
examples = Dir[File.dirname(__FILE__) + "/examples/*.html"]
|
8
|
-
|
9
|
-
examples.each do |filename|
|
10
|
-
context "#{filename}" do
|
11
|
-
let(:html) { File.read(filename) }
|
12
|
-
let(:text_file) { filename.sub(".html", ".txt") }
|
13
|
-
let(:expected) { Html2Text.fix_newlines(File.read(text_file)) }
|
14
|
-
|
15
|
-
it "has an expected output" do
|
16
|
-
expect(File.exist?(text_file)).to eq(true), "'#{text_file}' did not exist"
|
17
|
-
end
|
18
|
-
|
19
|
-
it "converts to text" do
|
20
|
-
expect(text).to eq(expected)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
it "has examples to test" do
|
26
|
-
expect(examples.size).to_not eq(0)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
data/spec/html2text_spec.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe Html2Text do
|
4
|
-
describe "#convert" do
|
5
|
-
let(:text) { Html2Text.convert(html) }
|
6
|
-
|
7
|
-
context "an empty line" do
|
8
|
-
let(:html) { "" }
|
9
|
-
|
10
|
-
it "is an empty line" do
|
11
|
-
expect(text).to eq("")
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
context "a simple string" do
|
16
|
-
let(:html) { "hello world" }
|
17
|
-
|
18
|
-
it "is an empty line" do
|
19
|
-
expect(text).to eq("hello world")
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
describe "#remove_leading_and_trailing_whitespace" do
|
25
|
-
let(:subject) { Html2Text.new(nil).remove_leading_and_trailing_whitespace(input) }
|
26
|
-
|
27
|
-
context "an empty string" do
|
28
|
-
let(:input) { "" }
|
29
|
-
it { is_expected.to eq("") }
|
30
|
-
end
|
31
|
-
|
32
|
-
context "many new lines" do
|
33
|
-
let(:input) { "hello\n world \n yes" }
|
34
|
-
it { is_expected.to eq("hello\nworld\nyes") }
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
data/spec/spec_helper.rb
DELETED