remote_table 0.2.22 → 0.2.23
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/remote_table/file/csv.rb +1 -0
- data/lib/remote_table/file/fixed_width.rb +1 -0
- data/lib/remote_table/file/html.rb +2 -1
- data/lib/remote_table/file.rb +9 -1
- data/remote_table.gemspec +2 -2
- data/test/remote_table_test.rb +9 -3
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.23
|
@@ -3,6 +3,7 @@ class RemoteTable
|
|
3
3
|
def each_row(&block)
|
4
4
|
backup_file!
|
5
5
|
convert_file_to_utf8!
|
6
|
+
remove_useless_characters!
|
6
7
|
html_headers = (headers.is_a?(Array)) ? headers : nil
|
7
8
|
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
|
8
9
|
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
@@ -29,7 +30,7 @@ class RemoteTable
|
|
29
30
|
# should we be doing this in ruby?
|
30
31
|
def unescaped_html_without_soft_hyphens
|
31
32
|
str = CGI.unescapeHTML IO.read(path)
|
32
|
-
str.gsub! /­
|
33
|
+
str.gsub! /­/, ''
|
33
34
|
str
|
34
35
|
end
|
35
36
|
end
|
data/lib/remote_table/file.rb
CHANGED
@@ -63,8 +63,16 @@ class RemoteTable
|
|
63
63
|
FileUtils.mv "#{path}.tmp", path
|
64
64
|
end
|
65
65
|
|
66
|
+
USELESS_CHARACTERS = [
|
67
|
+
'\xef\xbb\xbf', # UTF-8 byte order mark
|
68
|
+
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
69
|
+
]
|
70
|
+
def remove_useless_characters!
|
71
|
+
RemoteTable.backtick_with_reporting "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g' #{path} > #{path}.tmp"
|
72
|
+
FileUtils.mv "#{path}.tmp", path
|
73
|
+
end
|
74
|
+
|
66
75
|
def convert_file_to_utf8!
|
67
|
-
return if encoding == 'UTF-8' or encoding == 'UTF8'
|
68
76
|
RemoteTable.backtick_with_reporting "iconv -c -f #{encoding} -t UTF-8 #{path} > #{path}.tmp"
|
69
77
|
FileUtils.mv "#{path}.tmp", path
|
70
78
|
end
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.23"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-21}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/remote_table_test.rb
CHANGED
@@ -128,6 +128,14 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
128
128
|
end
|
129
129
|
|
130
130
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
131
|
+
end
|
132
|
+
|
133
|
+
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
134
|
+
should "ignore UTF-8 byte order marks" do
|
135
|
+
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
136
|
+
assert_equal 'Tawleed', t.rows.first['name']
|
137
|
+
end
|
138
|
+
|
131
139
|
should "be able to apply errata files" do
|
132
140
|
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
133
141
|
:encoding => 'windows-1252',
|
@@ -140,9 +148,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
140
148
|
assert_equal 'GRUMMAN', g1['Manufacturer']
|
141
149
|
assert_equal 'G159 Gulfstream I (TC4 Academe, VC4)', g1['Model']
|
142
150
|
end
|
143
|
-
|
144
|
-
|
145
|
-
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
151
|
+
|
146
152
|
# this will die with an error about libcurl if your curl doesn't support ssl
|
147
153
|
should "connect using HTTPS if available" do
|
148
154
|
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 23
|
9
|
+
version: 0.2.23
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-21 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|