html_to_plain_text 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/html_to_plain_text.rb +8 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2297205c53aeb650d7bacf92fd72e9d259fde896
|
4
|
+
data.tar.gz: dca3dc23d7916059863ded0b723d81df9384c818
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c6db32a927658696231f29604abafa19f11ccd1ec3b9085d687e4cb736cb64ad46fc3f364fb46c5f4c8c72d3d6485d3472baa0d0e1f9252ef6958b0e8d68aee
|
7
|
+
data.tar.gz: c737541bb924ecddbd202f07688d1963241893ed0c537d12714252c8aea69f101ebda69baaf7ac3dc13ab91c397b73d347cd64e799154b0d88b6283b286215de
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.5
|
data/lib/html_to_plain_text.rb
CHANGED
@@ -17,6 +17,7 @@ module HtmlToPlainText
|
|
17
17
|
UL = "ul".freeze
|
18
18
|
LI = "li".freeze
|
19
19
|
A = "a".freeze
|
20
|
+
TABLE = "table".freeze
|
20
21
|
NUMBERS = ["1", "a"].freeze
|
21
22
|
ABSOLUTE_URL_PATTERN = /^[a-z]+:\/\/[a-z0-9]/i.freeze
|
22
23
|
HTML_PATTERN = /[<&]/.freeze
|
@@ -30,6 +31,7 @@ module HtmlToPlainText
|
|
30
31
|
EMPTY = "".freeze
|
31
32
|
NEWLINE = "\n".freeze
|
32
33
|
HREF = "href".freeze
|
34
|
+
TABLE_SEPARATOR = " | ".freeze
|
33
35
|
|
34
36
|
# Helper instance method for converting HTML into plain text. This method simply calls HtmlToPlainText.plain_text.
|
35
37
|
def plain_text(html)
|
@@ -59,7 +61,7 @@ module HtmlToPlainText
|
|
59
61
|
end
|
60
62
|
|
61
63
|
format_list_item(out, options) if parent.name == LI
|
62
|
-
out << "| " if parent.name == TR
|
64
|
+
out << "| " if parent.name == TR && data_table?(parent.parent)
|
63
65
|
|
64
66
|
parent.children.each do |node|
|
65
67
|
if node.text? || node.cdata?
|
@@ -82,7 +84,7 @@ module HtmlToPlainText
|
|
82
84
|
out << NEWLINE unless out.end_with?(NEWLINE)
|
83
85
|
out << "-------------------------------\n"
|
84
86
|
elsif node.name == TD || node.name == TH
|
85
|
-
out <<
|
87
|
+
out << (data_table?(parent.parent) ? TABLE_SEPARATOR : SPACE)
|
86
88
|
elsif node.name == A
|
87
89
|
href = node[HREF]
|
88
90
|
if href &&
|
@@ -147,5 +149,9 @@ module HtmlToPlainText
|
|
147
149
|
out << "#{number}. "
|
148
150
|
end
|
149
151
|
end
|
152
|
+
|
153
|
+
def data_table?(table)
|
154
|
+
table.attributes['border'].to_s.to_i > 0
|
155
|
+
end
|
150
156
|
end
|
151
157
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html_to_plain_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Durand
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|