remote_table 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +10 -0
- data/lib/remote_table.rb +3 -0
- data/lib/remote_table/plaintext.rb +20 -7
- data/lib/remote_table/version.rb +1 -1
- data/test/support/airports.utf8.csv +5 -0
- data/test/test_old_syntax.rb +1 -1
- data/test/test_remote_table.rb +11 -0
- metadata +5 -3
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
2.0.2 / 2012-06-20
|
|
2
|
+
|
|
3
|
+
* Secrets revealed
|
|
4
|
+
|
|
5
|
+
* Indeed it is OK to omit the file:// as long as you are using an absolute path to a local file... e.g., RemoteTable.new('/path/to/foo')
|
|
6
|
+
|
|
7
|
+
* Bug fixes
|
|
8
|
+
|
|
9
|
+
* Don't bork UTF-8 by incorrectly treating \xAD as a soft hyphen
|
|
10
|
+
|
|
1
11
|
2.0.1 / 2012-05-16
|
|
2
12
|
|
|
3
13
|
* Enhancements
|
data/lib/remote_table.rb
CHANGED
|
@@ -147,6 +147,9 @@ class RemoteTable
|
|
|
147
147
|
# @example Local
|
|
148
148
|
# file:///Users/myuser/Desktop/holidays.csv
|
|
149
149
|
#
|
|
150
|
+
# @example Local using an absolute path
|
|
151
|
+
# /Users/myuser/Desktop/holidays.csv
|
|
152
|
+
#
|
|
150
153
|
# @example Remote
|
|
151
154
|
# http://data.brighterplanet.com/countries.csv
|
|
152
155
|
#
|
|
@@ -3,17 +3,30 @@ require 'unix_utils'
|
|
|
3
3
|
|
|
4
4
|
class RemoteTable
|
|
5
5
|
# Helper methods that act on plaintext files before they are parsed
|
|
6
|
-
module Plaintext
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
module Plaintext
|
|
7
|
+
class << self
|
|
8
|
+
# @private
|
|
9
|
+
# Code for the soft hyphen, often inserted by MS Office (html: ­)
|
|
10
|
+
def soft_hyphen(encoding)
|
|
11
|
+
case encoding
|
|
12
|
+
when /775/, /85[02578]/
|
|
13
|
+
'\xF0'
|
|
14
|
+
when /utf-?8/i
|
|
15
|
+
'\xc2\xad'
|
|
16
|
+
else # iso-8859-1, latin1, windows-1252, etc...
|
|
17
|
+
'\xad'
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# UTF-8 byte order mark
|
|
23
|
+
UTF8_BOM = '\xef\xbb\xbf'
|
|
12
24
|
EOL_TO_UNIX = 's/\r\n|\n|\r/\n/g'
|
|
13
25
|
|
|
14
26
|
# Remove bytes that are both useless and harmful in the vast majority of cases.
|
|
15
27
|
def delete_harmful!
|
|
16
|
-
|
|
28
|
+
harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
|
|
29
|
+
local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
|
|
17
30
|
end
|
|
18
31
|
|
|
19
32
|
# No matter what the file encoding is SUPPOSED to be, run it through the system iconv binary to make sure it's UTF-8
|
data/lib/remote_table/version.rb
CHANGED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
id,name,city,country_name,iata_code,icao_code,latitude,longitude,altitude,timezone,daylight_savings
|
|
2
|
+
6060,"Capitán Av. German Quiroga G. Airport","San Borja","Bolivia","SRJ","SLSB",-14.8592,-66.7375,633,-4,"U"
|
|
3
|
+
6058,"Capitán Av. Selin Zeitun Lopez Airport","Riberalta","Bolivia","RIB","SLRI",-11,-66,462,-4,"U"
|
|
4
|
+
6056,"Capitán de Av. Emilio Beltrán Airport","Guayaramerín","Bolivia","GYA","SLGY",-10.8206,-65.3456,557,-4,"U"
|
|
5
|
+
6054,"Obando Airport","Puerto Inírida","Colombia","PDA","SKPD",3.85,-67.91,460,-5,"U"
|
data/test/test_old_syntax.rb
CHANGED
|
@@ -44,7 +44,7 @@ describe RemoteTable do
|
|
|
44
44
|
|
|
45
45
|
it "ignore UTF-8 byte order marks" do
|
|
46
46
|
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
|
47
|
-
t.rows.first['
|
|
47
|
+
t.rows.first['Name'].must_equal 'Tawleed'
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
# this will die with an error about libcurl if your curl doesn't support ssl
|
data/test/test_remote_table.rb
CHANGED
|
@@ -8,6 +8,17 @@ describe RemoteTable do
|
|
|
8
8
|
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
|
9
9
|
end
|
|
10
10
|
|
|
11
|
+
it "doesn't screw up UTF-8" do
|
|
12
|
+
t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
|
|
13
|
+
t[3]['city'].must_equal "Puerto Inírida"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "likes paths as much as urls for local files" do
|
|
17
|
+
by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
|
|
18
|
+
by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
|
|
19
|
+
by_path.rows.must_equal by_url.rows
|
|
20
|
+
end
|
|
21
|
+
|
|
11
22
|
it "does its best to download urls without http://" do
|
|
12
23
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
|
13
24
|
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: remote_table
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.2
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date: 2012-
|
|
13
|
+
date: 2012-06-20 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: activesupport
|
|
@@ -159,6 +159,7 @@ files:
|
|
|
159
159
|
- remote_table.gemspec
|
|
160
160
|
- test/fixtures/data.yml
|
|
161
161
|
- test/helper.rb
|
|
162
|
+
- test/support/airports.utf8.csv
|
|
162
163
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
|
163
164
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
|
164
165
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|
|
@@ -198,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
198
199
|
version: '0'
|
|
199
200
|
requirements: []
|
|
200
201
|
rubyforge_project: remotetable
|
|
201
|
-
rubygems_version: 1.8.
|
|
202
|
+
rubygems_version: 1.8.24
|
|
202
203
|
signing_key:
|
|
203
204
|
specification_version: 3
|
|
204
205
|
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
|
|
@@ -206,6 +207,7 @@ summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (com
|
|
|
206
207
|
test_files:
|
|
207
208
|
- test/fixtures/data.yml
|
|
208
209
|
- test/helper.rb
|
|
210
|
+
- test/support/airports.utf8.csv
|
|
209
211
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
|
210
212
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
|
211
213
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|