html2text 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ <b>Hello &nbsnbsp; world</b>
2
+ <div class=">
3
+ Error
4
+ </div>
@@ -0,0 +1 @@
1
+ Hello &nbsnbsp; world
@@ -0,0 +1 @@
1
+ <html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.EmailStyle17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:72.0pt 72.0pt 72.0pt 72.0pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext="edit" spidmax="1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext="edit"> <o:idmap v:ext="edit" data="1" /> </o:shapelayout></xml><![endif]--></head><body lang=EN-GB link="#0563C1" vlink="#954F72"><div class=WordSection1><p class=MsoNormal>Dear html2text,<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>This is an example email that can be used to test html2text conversion of outlook / exchange emails.<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>The addition of &lt;o:p&gt; tags is very annoying!<o:p></o:p></p><p class=MsoNormal>This is a single line return<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal><b>This is bold<o:p></o:p></b></p><p class=MsoNormal><i>This is italic<o:p></o:p></i></p><p class=MsoNormal><u>This is underline<o:p></o:p></u></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>Andrew<o:p></o:p></p></div></body></html>
@@ -0,0 +1,12 @@
1
+ Dear html2text,
2
+
3
+ This is an example email that can be used to test html2text conversion of outlook / exchange emails.
4
+
5
+ The addition of <o:p> tags is very annoying!
6
+ This is a single line return
7
+
8
+ This is bold
9
+ This is italic
10
+ This is underline
11
+
12
+ Andrew
@@ -0,0 +1,17 @@
1
+ <html>
2
+ <body>
3
+ <div>
4
+ Just two divs
5
+ </div>
6
+ <div>
7
+ Hanging out
8
+ </div>
9
+ <div><div><div>Nested divs and line breaks</div></div><br></div>
10
+ <div><div>Nested divs and line breaks</div>More text<br></div>
11
+ <div><br></div>
12
+ <div>Just text</div>
13
+ <div>Just text<br></div>
14
+ <div>Just text<br><br></div>
15
+ This is the end!
16
+ </body>
17
+ </html>
@@ -0,0 +1,12 @@
1
+ Just two divs
2
+ Hanging out
3
+ Nested divs and line breaks
4
+
5
+ Nested divs and line breaks
6
+ More text
7
+
8
+ Just text
9
+ Just text
10
+ Just text
11
+
12
+ This is the end!
@@ -0,0 +1,50 @@
1
+ <html>
2
+ <body>
3
+ <div>
4
+ Hello
5
+ <br>
6
+ </div>
7
+ <div>
8
+ How are you?
9
+ <br>
10
+ </div>
11
+
12
+ <p>
13
+ How are you?
14
+ <br>
15
+ </p>
16
+
17
+ <p>
18
+ How are you?
19
+ <br>
20
+ </p>
21
+
22
+ <div>
23
+ Just two divs
24
+ </div>
25
+ <div>
26
+ Hanging out
27
+ </div>
28
+
29
+ This is not the end!
30
+ <div>
31
+ How are you again?
32
+ <br>
33
+ </div>
34
+ This is the end!
35
+ <br>
36
+ Just kidding
37
+ <h1>Header 1</h1>
38
+ Some text
39
+ <hr>
40
+ Some more text
41
+ <p>Paragraph tag!</p>
42
+ <h2>Header 2</h2>
43
+ <hr>
44
+ <h3>Header 3</h3>
45
+ Some text
46
+ <h4>Header 4</h4>
47
+ <p>Paragraph tag!</p>
48
+ Final line
49
+ </body>
50
+ </html>
@@ -0,0 +1,35 @@
1
+ Hello
2
+ How are you?
3
+
4
+ How are you?
5
+
6
+ How are you?
7
+
8
+ Just two divs
9
+ Hanging out
10
+ This is not the end!
11
+ How are you again?
12
+ This is the end!
13
+ Just kidding
14
+
15
+ Header 1
16
+
17
+ Some text
18
+ ---------------------------------------------------------------
19
+ Some more text
20
+
21
+ Paragraph tag!
22
+
23
+ Header 2
24
+
25
+ ---------------------------------------------------------------
26
+
27
+ Header 3
28
+
29
+ Some text
30
+
31
+ Header 4
32
+
33
+ Paragraph tag!
34
+
35
+ Final line
@@ -0,0 +1 @@
1
+ these spaces are non-breaking
@@ -0,0 +1 @@
1
+ these spaces are non-breaking
@@ -0,0 +1,10 @@
1
+ Here is the code
2
+ <pre>
3
+ #include &lt;stdlib.h&gt;
4
+ #include &lt;stdio.h&gt;
5
+
6
+ int main(){
7
+ return 0;
8
+ };
9
+
10
+ </pre>
@@ -0,0 +1,8 @@
1
+ Here is the code
2
+
3
+ #include <stdlib.h>
4
+ #include <stdio.h>
5
+
6
+ int main(){
7
+ return 0;
8
+ };
@@ -1 +1 @@
1
- 1<br />2<br />3<br />4<br />5 6
1
+ 1<br />2<br />3<br />4<br />5 &lt; 6
@@ -1,5 +1,5 @@
1
- 1
2
- 2
3
- 3
4
- 4
5
- 5 6
1
+ 1
2
+ 2
3
+ 3
4
+ 4
5
+ 5 < 6
@@ -0,0 +1,4 @@
1
+ <ul>
2
+ <li>ÅÄÖ</li>
3
+ <li>åäö</li>
4
+ </ul>
@@ -0,0 +1,2 @@
1
+ - ÅÄÖ
2
+ - åäö
@@ -0,0 +1,4 @@
1
+ <ul>
2
+ <li>���</li>
3
+ <li>���</li>
4
+ </ul>
@@ -0,0 +1,2 @@
1
+ - ÅÄÖ
2
+ - åäö
@@ -0,0 +1 @@
1
+ <p>foo&zwnj;bar</p>
@@ -0,0 +1 @@
1
+ foobar
@@ -17,7 +17,19 @@ describe Html2Text do
17
17
  end
18
18
 
19
19
  it "converts to text" do
20
- expect(text).to eq(expected)
20
+ # Write the output if it failed, for easier comparison
21
+ if !text.eql?(expected)
22
+ File.open(filename.sub(".html", ".output"), 'w') do |fp|
23
+ fp.write(text)
24
+ end
25
+ end
26
+
27
+ # Quick check, don't try to generate a 500kb+ diff,
28
+ # which can halt the rspec for minutes+
29
+ expect(text.length).to eq expected.length if text.length > 10000
30
+
31
+ # More complete check
32
+ expect(text).to eq expected
21
33
  end
22
34
  end
23
35
  end
@@ -19,6 +19,27 @@ describe Html2Text do
19
19
  expect(text).to eq("hello world")
20
20
  end
21
21
  end
22
+
23
+ context "input value is non-string" do
24
+ let(:html) { nil }
25
+ it '(nil)' do
26
+ expect(text).to eq("")
27
+ end
28
+ end
29
+
30
+ context "input value is non-string" do
31
+ let(:html) { 1234 }
32
+ it "(number)" do
33
+ expect(text).to eq("1234")
34
+ end
35
+ end
36
+
37
+ context "input value is non-string" do
38
+ let(:html) { 1234.5600 }
39
+ it "(float number)" do
40
+ expect(text).to eq("1234.56")
41
+ end
42
+ end
22
43
  end
23
44
 
24
45
  describe "#remove_leading_and_trailing_whitespace" do
metadata CHANGED
@@ -1,92 +1,107 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jevon Wright
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-18 00:00:00.000000000 Z
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.6'
19
+ version: 1.10.3
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.6'
26
+ version: 1.10.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec-collection_matchers
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: colorize
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ! '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ! '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ! '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ! '>='
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: bundler-audit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
97
  description: A Ruby component to convert HTML into a plain text format.
84
98
  email:
85
- - jevon@powershop.co.nz
99
+ - jevon@jevon.org
86
100
  executables: []
87
101
  extensions: []
88
102
  extra_rdoc_files: []
89
103
  files:
104
+ - CHANGELOG.md
90
105
  - LICENSE.md
91
106
  - README.md
92
107
  - lib/html2text.rb
@@ -95,22 +110,46 @@ files:
95
110
  - spec/examples/anchors.txt
96
111
  - spec/examples/basic.html
97
112
  - spec/examples/basic.txt
113
+ - spec/examples/dom-processing.html
114
+ - spec/examples/dom-processing.txt
115
+ - spec/examples/empty.html
116
+ - spec/examples/empty.txt
98
117
  - spec/examples/full_email.html
99
118
  - spec/examples/full_email.txt
119
+ - spec/examples/huge-msoffice.html
120
+ - spec/examples/huge-msoffice.txt
100
121
  - spec/examples/images.html
101
122
  - spec/examples/images.txt
123
+ - spec/examples/invalid.html
124
+ - spec/examples/invalid.txt
102
125
  - spec/examples/lists.html
103
126
  - spec/examples/lists.txt
104
127
  - spec/examples/more-anchors.html
105
128
  - spec/examples/more-anchors.txt
129
+ - spec/examples/msoffice.html
130
+ - spec/examples/msoffice.txt
106
131
  - spec/examples/nbsp.html
107
132
  - spec/examples/nbsp.txt
133
+ - spec/examples/nested-divs.html
134
+ - spec/examples/nested-divs.txt
135
+ - spec/examples/newlines.html
136
+ - spec/examples/newlines.txt
137
+ - spec/examples/non-breaking-spaces.html
138
+ - spec/examples/non-breaking-spaces.txt
139
+ - spec/examples/pre.html
140
+ - spec/examples/pre.txt
108
141
  - spec/examples/table.html
109
142
  - spec/examples/table.txt
110
143
  - spec/examples/test3.html
111
144
  - spec/examples/test3.txt
112
145
  - spec/examples/test4.html
113
146
  - spec/examples/test4.txt
147
+ - spec/examples/utf8-example.html
148
+ - spec/examples/utf8-example.txt
149
+ - spec/examples/windows-1252-example.html
150
+ - spec/examples/windows-1252-example.txt
151
+ - spec/examples/zero-width-non-joiners.html
152
+ - spec/examples/zero-width-non-joiners.txt
114
153
  - spec/examples_spec.rb
115
154
  - spec/html2text_spec.rb
116
155
  - spec/spec_helper.rb
@@ -124,41 +163,64 @@ require_paths:
124
163
  - lib
125
164
  required_ruby_version: !ruby/object:Gem::Requirement
126
165
  requirements:
127
- - - ! '>='
166
+ - - ">="
128
167
  - !ruby/object:Gem::Version
129
168
  version: '0'
130
169
  required_rubygems_version: !ruby/object:Gem::Requirement
131
170
  requirements:
132
- - - ! '>='
171
+ - - ">="
133
172
  - !ruby/object:Gem::Version
134
173
  version: '0'
135
174
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 2.4.5
175
+ rubygems_version: 3.0.3
138
176
  signing_key:
139
177
  specification_version: 4
140
178
  summary: Convert HTML into plain text.
141
179
  test_files:
142
- - spec/examples/anchors.html
143
- - spec/examples/anchors.txt
180
+ - spec/examples/nested-divs.html
181
+ - spec/examples/pre.html
182
+ - spec/examples/invalid.html
183
+ - spec/examples/empty.html
184
+ - spec/examples/table.html
144
185
  - spec/examples/basic.html
145
- - spec/examples/basic.txt
146
- - spec/examples/full_email.html
147
- - spec/examples/full_email.txt
148
- - spec/examples/images.html
149
- - spec/examples/images.txt
150
- - spec/examples/lists.html
151
- - spec/examples/lists.txt
152
- - spec/examples/more-anchors.html
153
- - spec/examples/more-anchors.txt
154
186
  - spec/examples/nbsp.html
187
+ - spec/examples/utf8-example.html
188
+ - spec/examples/newlines.txt
189
+ - spec/examples/full_email.txt
190
+ - spec/examples/msoffice.html
191
+ - spec/examples/zero-width-non-joiners.txt
192
+ - spec/examples/anchors.html
155
193
  - spec/examples/nbsp.txt
156
- - spec/examples/table.html
157
- - spec/examples/table.txt
194
+ - spec/examples/zero-width-non-joiners.html
158
195
  - spec/examples/test3.html
159
- - spec/examples/test3.txt
160
- - spec/examples/test4.html
161
196
  - spec/examples/test4.txt
197
+ - spec/examples/huge-msoffice.txt
198
+ - spec/examples/full_email.html
199
+ - spec/examples/utf8-example.txt
200
+ - spec/examples/table.txt
201
+ - spec/examples/huge-msoffice.html
202
+ - spec/examples/more-anchors.txt
203
+ - spec/examples/newlines.html
204
+ - spec/examples/test4.html
205
+ - spec/examples/basic.txt
206
+ - spec/examples/lists.html
207
+ - spec/examples/nested-divs.txt
208
+ - spec/examples/non-breaking-spaces.html
209
+ - spec/examples/invalid.txt
210
+ - spec/examples/empty.txt
211
+ - spec/examples/images.txt
212
+ - spec/examples/non-breaking-spaces.txt
213
+ - spec/examples/dom-processing.txt
214
+ - spec/examples/test3.txt
215
+ - spec/examples/dom-processing.html
216
+ - spec/examples/lists.txt
217
+ - spec/examples/pre.txt
218
+ - spec/examples/anchors.txt
219
+ - spec/examples/more-anchors.html
220
+ - spec/examples/windows-1252-example.txt
221
+ - spec/examples/images.html
222
+ - spec/examples/msoffice.txt
223
+ - spec/examples/windows-1252-example.html
162
224
  - spec/examples_spec.rb
163
225
  - spec/html2text_spec.rb
164
226
  - spec/spec_helper.rb