html_to_plain_text 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.1
1
+ 1.0.2
@@ -16,9 +16,10 @@ module HtmlToPlainText
16
16
  OL = "ol".freeze
17
17
  UL = "ul".freeze
18
18
  LI = "li".freeze
19
- NUMBERS = ["1", "a"]
20
- ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i
21
- HTML_PATTERN = /[<&]/
19
+ NUMBERS = ["1", "a"].freeze
20
+ ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
21
+ HTML_PATTERN = /[<&]/.freeze
22
+ TRAILING_WHITESPACE = /[ \t]+$/.freeze
22
23
 
23
24
  # Helper instance method for converting HTML into plain text. This method simply calls HtmlToPlainText.plain_text.
24
25
  def plain_text(html)
@@ -63,8 +64,10 @@ module HtmlToPlainText
63
64
  convert_node_to_plain_text(node, out, child_options(node, options))
64
65
 
65
66
  if node.name == BR
67
+ out.sub!(TRAILING_WHITESPACE, '')
66
68
  out << "\n"
67
69
  elsif node.name == HR
70
+ out.sub!(TRAILING_WHITESPACE, '')
68
71
  out << "\n" unless out.end_with?("\n")
69
72
  out << "-------------------------------\n"
70
73
  elsif node.name == TD || node.name == TH
@@ -104,7 +107,7 @@ module HtmlToPlainText
104
107
  # Add double line breaks between paragraph elements. If line breaks already exist,
105
108
  # new ones will only be added to get to two.
106
109
  def append_paragraph_breaks(out)
107
- out.chomp!(" ")
110
+ out.sub!(TRAILING_WHITESPACE, '')
108
111
  if out.end_with?("\n")
109
112
  out << "\n" unless out.end_with?("\n\n")
110
113
  else
@@ -115,7 +118,7 @@ module HtmlToPlainText
115
118
  # Add a single line break between block elements. If a line break already exists,
116
119
  # none will be added.
117
120
  def append_block_breaks(out)
118
- out.chomp!(" ")
121
+ out.sub!(TRAILING_WHITESPACE, '')
119
122
  out << "\n" unless out.end_with?("\n")
120
123
  end
121
124
 
@@ -2,22 +2,22 @@ require 'spec_helper'
2
2
 
3
3
  describe HtmlToPlainText do
4
4
  it "should format paragraph tags" do
5
- html = "<h1>Test</h1><h2>More Test</h2><p>This is a test</p>"
5
+ html = "<h1>Test</h1><h2>More Test</h2>\t \t<p>\n\tThis is a test\n</p>"
6
6
  HtmlToPlainText.plain_text(html).should == "Test\n\nMore Test\n\nThis is a test"
7
7
  end
8
8
 
9
9
  it "should format block tags" do
10
- html = "<div>Test</div><div>More Test<div>This is a test</div></div>"
10
+ html = "<div>Test</div><div>More Test<div>\t This is a test\t </div></div>"
11
11
  HtmlToPlainText.plain_text(html).should == "Test\nMore Test\nThis is a test"
12
12
  end
13
13
 
14
14
  it "should format <br> tags" do
15
- html = "<div>Test</div><br><div>More Test<br />This is a test"
15
+ html = "<div>Test</div><br><div>More Test \t <br />This is a test"
16
16
  HtmlToPlainText.plain_text(html).should == "Test\n\nMore Test\nThis is a test"
17
17
  end
18
18
 
19
19
  it "should format <hr> tags" do
20
- html = "<div>Test</div><hr><div>More Test<hr />This is a test"
20
+ html = "<div>Test</div><hr><div>More Test \t <hr />This is a test"
21
21
  HtmlToPlainText.plain_text(html).should == "Test\n-------------------------------\nMore Test\n-------------------------------\nThis is a test"
22
22
  end
23
23
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_to_plain_text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 1
10
- version: 1.0.1
9
+ - 2
10
+ version: 1.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Durand
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-06-21 00:00:00 -05:00
18
+ date: 2011-08-05 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency