remote_table 0.2.22 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/remote_table/file/csv.rb +1 -0
- data/lib/remote_table/file/fixed_width.rb +1 -0
- data/lib/remote_table/file/html.rb +2 -1
- data/lib/remote_table/file.rb +9 -1
- data/remote_table.gemspec +2 -2
- data/test/remote_table_test.rb +9 -3
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.23
|
@@ -3,6 +3,7 @@ class RemoteTable
|
|
3
3
|
def each_row(&block)
|
4
4
|
backup_file!
|
5
5
|
convert_file_to_utf8!
|
6
|
+
remove_useless_characters!
|
6
7
|
html_headers = (headers.is_a?(Array)) ? headers : nil
|
7
8
|
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
|
8
9
|
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
@@ -29,7 +30,7 @@ class RemoteTable
|
|
29
30
|
# should we be doing this in ruby?
|
30
31
|
def unescaped_html_without_soft_hyphens
|
31
32
|
str = CGI.unescapeHTML IO.read(path)
|
32
|
-
str.gsub! /­
|
33
|
+
str.gsub! /­/, ''
|
33
34
|
str
|
34
35
|
end
|
35
36
|
end
|
data/lib/remote_table/file.rb
CHANGED
@@ -63,8 +63,16 @@ class RemoteTable
|
|
63
63
|
FileUtils.mv "#{path}.tmp", path
|
64
64
|
end
|
65
65
|
|
66
|
+
USELESS_CHARACTERS = [
|
67
|
+
'\xef\xbb\xbf', # UTF-8 byte order mark
|
68
|
+
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
69
|
+
]
|
70
|
+
def remove_useless_characters!
|
71
|
+
RemoteTable.backtick_with_reporting "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g' #{path} > #{path}.tmp"
|
72
|
+
FileUtils.mv "#{path}.tmp", path
|
73
|
+
end
|
74
|
+
|
66
75
|
def convert_file_to_utf8!
|
67
|
-
return if encoding == 'UTF-8' or encoding == 'UTF8'
|
68
76
|
RemoteTable.backtick_with_reporting "iconv -c -f #{encoding} -t UTF-8 #{path} > #{path}.tmp"
|
69
77
|
FileUtils.mv "#{path}.tmp", path
|
70
78
|
end
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.23"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-21}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/remote_table_test.rb
CHANGED
@@ -128,6 +128,14 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
128
128
|
end
|
129
129
|
|
130
130
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
131
|
+
end
|
132
|
+
|
133
|
+
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
134
|
+
should "ignore UTF-8 byte order marks" do
|
135
|
+
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
136
|
+
assert_equal 'Tawleed', t.rows.first['name']
|
137
|
+
end
|
138
|
+
|
131
139
|
should "be able to apply errata files" do
|
132
140
|
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
133
141
|
:encoding => 'windows-1252',
|
@@ -140,9 +148,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
140
148
|
assert_equal 'GRUMMAN', g1['Manufacturer']
|
141
149
|
assert_equal 'G159 Gulfstream I (TC4 Academe, VC4)', g1['Model']
|
142
150
|
end
|
143
|
-
|
144
|
-
|
145
|
-
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
151
|
+
|
146
152
|
# this will die with an error about libcurl if your curl doesn't support ssl
|
147
153
|
should "connect using HTTPS if available" do
|
148
154
|
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 23
|
9
|
+
version: 0.2.23
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-21 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|