remote_table 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +10 -0
- data/lib/remote_table.rb +3 -0
- data/lib/remote_table/plaintext.rb +20 -7
- data/lib/remote_table/version.rb +1 -1
- data/test/support/airports.utf8.csv +5 -0
- data/test/test_old_syntax.rb +1 -1
- data/test/test_remote_table.rb +11 -0
- metadata +5 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
2.0.2 / 2012-06-20
|
2
|
+
|
3
|
+
* Secrets revealed
|
4
|
+
|
5
|
+
* Indeed it is OK to omit the file:// as long as you are using an absolute path to a local file... e.g., RemoteTable.new('/path/to/foo')
|
6
|
+
|
7
|
+
* Bug fixes
|
8
|
+
|
9
|
+
* Don't bork UTF-8 by incorrectly treating \xAD as a soft hyphen
|
10
|
+
|
1
11
|
2.0.1 / 2012-05-16
|
2
12
|
|
3
13
|
* Enhancements
|
data/lib/remote_table.rb
CHANGED
@@ -147,6 +147,9 @@ class RemoteTable
|
|
147
147
|
# @example Local
|
148
148
|
# file:///Users/myuser/Desktop/holidays.csv
|
149
149
|
#
|
150
|
+
# @example Local using an absolute path
|
151
|
+
# /Users/myuser/Desktop/holidays.csv
|
152
|
+
#
|
150
153
|
# @example Remote
|
151
154
|
# http://data.brighterplanet.com/countries.csv
|
152
155
|
#
|
@@ -3,17 +3,30 @@ require 'unix_utils'
|
|
3
3
|
|
4
4
|
class RemoteTable
|
5
5
|
# Helper methods that act on plaintext files before they are parsed
|
6
|
-
module Plaintext
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
module Plaintext
|
7
|
+
class << self
|
8
|
+
# @private
|
9
|
+
# Code for the soft hyphen, often inserted by MS Office (html: ­)
|
10
|
+
def soft_hyphen(encoding)
|
11
|
+
case encoding
|
12
|
+
when /775/, /85[02578]/
|
13
|
+
'\xF0'
|
14
|
+
when /utf-?8/i
|
15
|
+
'\xc2\xad'
|
16
|
+
else # iso-8859-1, latin1, windows-1252, etc...
|
17
|
+
'\xad'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# UTF-8 byte order mark
|
23
|
+
UTF8_BOM = '\xef\xbb\xbf'
|
12
24
|
EOL_TO_UNIX = 's/\r\n|\n|\r/\n/g'
|
13
25
|
|
14
26
|
# Remove bytes that are both useless and harmful in the vast majority of cases.
|
15
27
|
def delete_harmful!
|
16
|
-
|
28
|
+
harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
|
29
|
+
local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
|
17
30
|
end
|
18
31
|
|
19
32
|
# No matter what the file encoding is SUPPOSED to be, run it through the system iconv binary to make sure it's UTF-8
|
data/lib/remote_table/version.rb
CHANGED
@@ -0,0 +1,5 @@
|
|
1
|
+
id,name,city,country_name,iata_code,icao_code,latitude,longitude,altitude,timezone,daylight_savings
|
2
|
+
6060,"Capitán Av. German Quiroga G. Airport","San Borja","Bolivia","SRJ","SLSB",-14.8592,-66.7375,633,-4,"U"
|
3
|
+
6058,"Capitán Av. Selin Zeitun Lopez Airport","Riberalta","Bolivia","RIB","SLRI",-11,-66,462,-4,"U"
|
4
|
+
6056,"Capitán de Av. Emilio Beltrán Airport","Guayaramerín","Bolivia","GYA","SLGY",-10.8206,-65.3456,557,-4,"U"
|
5
|
+
6054,"Obando Airport","Puerto Inírida","Colombia","PDA","SKPD",3.85,-67.91,460,-5,"U"
|
data/test/test_old_syntax.rb
CHANGED
@@ -44,7 +44,7 @@ describe RemoteTable do
|
|
44
44
|
|
45
45
|
it "ignore UTF-8 byte order marks" do
|
46
46
|
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
47
|
-
t.rows.first['
|
47
|
+
t.rows.first['Name'].must_equal 'Tawleed'
|
48
48
|
end
|
49
49
|
|
50
50
|
# this will die with an error about libcurl if your curl doesn't support ssl
|
data/test/test_remote_table.rb
CHANGED
@@ -8,6 +8,17 @@ describe RemoteTable do
|
|
8
8
|
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
9
9
|
end
|
10
10
|
|
11
|
+
it "doesn't screw up UTF-8" do
|
12
|
+
t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
|
13
|
+
t[3]['city'].must_equal "Puerto Inírida"
|
14
|
+
end
|
15
|
+
|
16
|
+
it "likes paths as much as urls for local files" do
|
17
|
+
by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
|
18
|
+
by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
|
19
|
+
by_path.rows.must_equal by_url.rows
|
20
|
+
end
|
21
|
+
|
11
22
|
it "does its best to download urls without http://" do
|
12
23
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
13
24
|
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-06-20 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
@@ -159,6 +159,7 @@ files:
|
|
159
159
|
- remote_table.gemspec
|
160
160
|
- test/fixtures/data.yml
|
161
161
|
- test/helper.rb
|
162
|
+
- test/support/airports.utf8.csv
|
162
163
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
163
164
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
164
165
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|
@@ -198,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
199
|
version: '0'
|
199
200
|
requirements: []
|
200
201
|
rubyforge_project: remotetable
|
201
|
-
rubygems_version: 1.8.
|
202
|
+
rubygems_version: 1.8.24
|
202
203
|
signing_key:
|
203
204
|
specification_version: 3
|
204
205
|
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
|
@@ -206,6 +207,7 @@ summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (com
|
|
206
207
|
test_files:
|
207
208
|
- test/fixtures/data.yml
|
208
209
|
- test/helper.rb
|
210
|
+
- test/support/airports.utf8.csv
|
209
211
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
210
212
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
211
213
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|