html_to_plain_text 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/html_to_plain_text.rb +8 -2
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d2fcc1c79047bd747cb209680ee94d2e4e8a0f41
4
- data.tar.gz: 3e31ccae62165b880f23447420ac370e5bd6eb19
3
+ metadata.gz: 2297205c53aeb650d7bacf92fd72e9d259fde896
4
+ data.tar.gz: dca3dc23d7916059863ded0b723d81df9384c818
5
5
  SHA512:
6
- metadata.gz: 642edbc9855a5a9a0a00a204e077f32e57efc0141843fcb2355c3fd86881ef2afa7672526dec45e48195c2fe8010e9751de9dd5e4bbb4e95437fe124bfbcd6ef
7
- data.tar.gz: 73ee95b2609f709c19187a5b1bd93025daa40af0d730fe69623d8a4be176cf8a5858526bc7456148e8d84a9e8da315f2520c7db3f9e10c8f84c71882ab63c734
6
+ metadata.gz: 5c6db32a927658696231f29604abafa19f11ccd1ec3b9085d687e4cb736cb64ad46fc3f364fb46c5f4c8c72d3d6485d3472baa0d0e1f9252ef6958b0e8d68aee
7
+ data.tar.gz: c737541bb924ecddbd202f07688d1963241893ed0c537d12714252c8aea69f101ebda69baaf7ac3dc13ab91c397b73d347cd64e799154b0d88b6283b286215de
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.4
1
+ 1.0.5
@@ -17,6 +17,7 @@ module HtmlToPlainText
17
17
  UL = "ul".freeze
18
18
  LI = "li".freeze
19
19
  A = "a".freeze
20
+ TABLE = "table".freeze
20
21
  NUMBERS = ["1", "a"].freeze
21
22
  ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
22
23
  HTML_PATTERN = /[<&]/.freeze
@@ -30,6 +31,7 @@ module HtmlToPlainText
30
31
  EMPTY = "".freeze
31
32
  NEWLINE = "\n".freeze
32
33
  HREF = "href".freeze
34
+ TABLE_SEPARATOR = " | ".freeze
33
35
 
34
36
  # Helper instance method for converting HTML into plain text. This method simply calls HtmlToPlainText.plain_text.
35
37
  def plain_text(html)
@@ -59,7 +61,7 @@ module HtmlToPlainText
59
61
  end
60
62
 
61
63
  format_list_item(out, options) if parent.name == LI
62
- out << "| " if parent.name == TR
64
+ out << "| " if parent.name == TR && data_table?(parent.parent)
63
65
 
64
66
  parent.children.each do |node|
65
67
  if node.text? || node.cdata?
@@ -82,7 +84,7 @@ module HtmlToPlainText
82
84
  out << NEWLINE unless out.end_with?(NEWLINE)
83
85
  out << "-------------------------------\n"
84
86
  elsif node.name == TD || node.name == TH
85
- out << " | "
87
+ out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE)
86
88
  elsif node.name == A
87
89
  href = node[HREF]
88
90
  if href &&
@@ -147,5 +149,9 @@ module HtmlToPlainText
147
149
  out << "#{number}. "
148
150
  end
149
151
  end
152
+
153
+ def data_table?(table)
154
+ table.attributes['border'].to_s.to_i > 0
155
+ end
150
156
  end
151
157
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html_to_plain_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Durand
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2015-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri