html_to_plain_text 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/html_to_plain_text.rb +8 -5
- data/spec/html_to_plain_text_spec.rb +4 -4
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.2
|
data/lib/html_to_plain_text.rb
CHANGED
@@ -16,9 +16,10 @@ module HtmlToPlainText
|
|
16
16
|
OL = "ol".freeze
|
17
17
|
UL = "ul".freeze
|
18
18
|
LI = "li".freeze
|
19
|
-
NUMBERS = ["1", "a"]
|
20
|
-
ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i
|
21
|
-
HTML_PATTERN = /[<&]
|
19
|
+
NUMBERS = ["1", "a"].freeze
|
20
|
+
ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
|
21
|
+
HTML_PATTERN = /[<&]/.freeze
|
22
|
+
TRAILING_WHITESPACE = /[ \t]+$/.freeze
|
22
23
|
|
23
24
|
# Helper instance method for converting HTML into plain text. This method simply calls HtmlToPlainText.plain_text.
|
24
25
|
def plain_text(html)
|
@@ -63,8 +64,10 @@ module HtmlToPlainText
|
|
63
64
|
convert_node_to_plain_text(node, out, child_options(node, options))
|
64
65
|
|
65
66
|
if node.name == BR
|
67
|
+
out.sub!(TRAILING_WHITESPACE, '')
|
66
68
|
out << "\n"
|
67
69
|
elsif node.name == HR
|
70
|
+
out.sub!(TRAILING_WHITESPACE, '')
|
68
71
|
out << "\n" unless out.end_with?("\n")
|
69
72
|
out << "-------------------------------\n"
|
70
73
|
elsif node.name == TD || node.name == TH
|
@@ -104,7 +107,7 @@ module HtmlToPlainText
|
|
104
107
|
# Add double line breaks between paragraph elements. If line breaks already exist,
|
105
108
|
# new ones will only be added to get to two.
|
106
109
|
def append_paragraph_breaks(out)
|
107
|
-
out.
|
110
|
+
out.sub!(TRAILING_WHITESPACE, '')
|
108
111
|
if out.end_with?("\n")
|
109
112
|
out << "\n" unless out.end_with?("\n\n")
|
110
113
|
else
|
@@ -115,7 +118,7 @@ module HtmlToPlainText
|
|
115
118
|
# Add a single line break between block elements. If a line break already exists,
|
116
119
|
# none will be added.
|
117
120
|
def append_block_breaks(out)
|
118
|
-
out.
|
121
|
+
out.sub!(TRAILING_WHITESPACE, '')
|
119
122
|
out << "\n" unless out.end_with?("\n")
|
120
123
|
end
|
121
124
|
|
@@ -2,22 +2,22 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe HtmlToPlainText do
|
4
4
|
it "should format paragraph tags" do
|
5
|
-
html = "<h1>Test</h1><h2>More Test</h2
|
5
|
+
html = "<h1>Test</h1><h2>More Test</h2>\t \t<p>\n\tThis is a test\n</p>"
|
6
6
|
HtmlToPlainText.plain_text(html).should == "Test\n\nMore Test\n\nThis is a test"
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should format block tags" do
|
10
|
-
html = "<div>Test</div><div>More Test<div
|
10
|
+
html = "<div>Test</div><div>More Test<div>\t This is a test\t </div></div>"
|
11
11
|
HtmlToPlainText.plain_text(html).should == "Test\nMore Test\nThis is a test"
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should format <br> tags" do
|
15
|
-
html = "<div>Test</div><br><div>More Test<br />This is a test"
|
15
|
+
html = "<div>Test</div><br><div>More Test \t <br />This is a test"
|
16
16
|
HtmlToPlainText.plain_text(html).should == "Test\n\nMore Test\nThis is a test"
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should format <hr> tags" do
|
20
|
-
html = "<div>Test</div><hr><div>More Test<hr />This is a test"
|
20
|
+
html = "<div>Test</div><hr><div>More Test \t <hr />This is a test"
|
21
21
|
HtmlToPlainText.plain_text(html).should == "Test\n-------------------------------\nMore Test\n-------------------------------\nThis is a test"
|
22
22
|
end
|
23
23
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_to_plain_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 2
|
10
|
+
version: 1.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brian Durand
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-08-05 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|