html_to_plain_text 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/html_to_plain_text.rb +8 -2
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d2fcc1c79047bd747cb209680ee94d2e4e8a0f41
4
- data.tar.gz: 3e31ccae62165b880f23447420ac370e5bd6eb19
3
+ metadata.gz: 2297205c53aeb650d7bacf92fd72e9d259fde896
4
+ data.tar.gz: dca3dc23d7916059863ded0b723d81df9384c818
5
5
  SHA512:
6
- metadata.gz: 642edbc9855a5a9a0a00a204e077f32e57efc0141843fcb2355c3fd86881ef2afa7672526dec45e48195c2fe8010e9751de9dd5e4bbb4e95437fe124bfbcd6ef
7
- data.tar.gz: 73ee95b2609f709c19187a5b1bd93025daa40af0d730fe69623d8a4be176cf8a5858526bc7456148e8d84a9e8da315f2520c7db3f9e10c8f84c71882ab63c734
6
+ metadata.gz: 5c6db32a927658696231f29604abafa19f11ccd1ec3b9085d687e4cb736cb64ad46fc3f364fb46c5f4c8c72d3d6485d3472baa0d0e1f9252ef6958b0e8d68aee
7
+ data.tar.gz: c737541bb924ecddbd202f07688d1963241893ed0c537d12714252c8aea69f101ebda69baaf7ac3dc13ab91c397b73d347cd64e799154b0d88b6283b286215de
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.4
1
+ 1.0.5
@@ -17,6 +17,7 @@ module HtmlToPlainText
17
17
  UL = "ul".freeze
18
18
  LI = "li".freeze
19
19
  A = "a".freeze
20
+ TABLE = "table".freeze
20
21
  NUMBERS = ["1", "a"].freeze
21
22
  ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
22
23
  HTML_PATTERN = /[<&]/.freeze
@@ -30,6 +31,7 @@ module HtmlToPlainText
30
31
  EMPTY = "".freeze
31
32
  NEWLINE = "\n".freeze
32
33
  HREF = "href".freeze
34
+ TABLE_SEPARATOR = " | ".freeze
33
35
 
34
36
  # Helper instance method for converting HTML into plain text. This method simply calls HtmlToPlainText.plain_text.
35
37
  def plain_text(html)
@@ -59,7 +61,7 @@ module HtmlToPlainText
59
61
  end
60
62
 
61
63
  format_list_item(out, options) if parent.name == LI
62
- out << "| " if parent.name == TR
64
+ out << "| " if parent.name == TR && data_table?(parent.parent)
63
65
 
64
66
  parent.children.each do |node|
65
67
  if node.text? || node.cdata?
@@ -82,7 +84,7 @@ module HtmlToPlainText
82
84
  out << NEWLINE unless out.end_with?(NEWLINE)
83
85
  out << "-------------------------------\n"
84
86
  elsif node.name == TD || node.name == TH
85
- out << " | "
87
+ out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE)
86
88
  elsif node.name == A
87
89
  href = node[HREF]
88
90
  if href &&
@@ -147,5 +149,9 @@ module HtmlToPlainText
147
149
  out << "#{number}. "
148
150
  end
149
151
  end
152
+
153
+ def data_table?(table)
154
+ table.attributes['border'].to_s.to_i > 0
155
+ end
150
156
  end
151
157
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_to_plain_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Durand
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2015-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri