remote_table 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,13 @@
1
+ 2.0.2 / 2012-06-20
2
+
3
+ * Secrets revealed
4
+
5
+ * Indeed it is OK to omit the file:// as long as you are using an absolute path to a local file... e.g., RemoteTable.new('/path/to/foo')
6
+
7
+ * Bug fixes
8
+
9
+ * Don't bork UTF-8 by incorrectly treating \xAD as a soft hyphen
10
+
1
11
  2.0.1 / 2012-05-16
2
12
 
3
13
  * Enhancements
data/lib/remote_table.rb CHANGED
@@ -147,6 +147,9 @@ class RemoteTable
147
147
  # @example Local
148
148
  # file:///Users/myuser/Desktop/holidays.csv
149
149
  #
150
+ # @example Local using an absolute path
151
+ # /Users/myuser/Desktop/holidays.csv
152
+ #
150
153
  # @example Remote
151
154
  # http://data.brighterplanet.com/countries.csv
152
155
  #
@@ -3,17 +3,30 @@ require 'unix_utils'
3
3
 
4
4
  class RemoteTable
5
5
  # Helper methods that act on plaintext files before they are parsed
6
- module Plaintext
7
- CONSIDERED_HARMFUL = [
8
- '\xef\xbb\xbf', # UTF-8 byte order mark
9
- '\xc2\xad', # soft hyphen, often inserted by MS Office (html: ­)
10
- '\xad' # any remaining soft hyphens (sometimes seen in windows-1252)
11
- ]
6
+ module Plaintext
7
+ class << self
8
+ # @private
9
+ # Code for the soft hyphen, often inserted by MS Office (html: &shy;)
10
+ def soft_hyphen(encoding)
11
+ case encoding
12
+ when /775/, /85[02578]/
13
+ '\xF0'
14
+ when /utf-?8/i
15
+ '\xc2\xad'
16
+ else # iso-8859-1, latin1, windows-1252, etc...
17
+ '\xad'
18
+ end
19
+ end
20
+ end
21
+
22
+ # UTF-8 byte order mark
23
+ UTF8_BOM = '\xef\xbb\xbf'
12
24
  EOL_TO_UNIX = 's/\r\n|\n|\r/\n/g'
13
25
 
14
26
  # Remove bytes that are both useless and harmful in the vast majority of cases.
15
27
  def delete_harmful!
16
- local_copy.in_place :perl, "s/#{CONSIDERED_HARMFUL.join('//g; s/')}//g"
28
+ harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
29
+ local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
17
30
  end
18
31
 
19
32
  # No matter what the file encoding is SUPPOSED to be, run it through the system iconv binary to make sure it's UTF-8
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.2"
3
3
  end
@@ -0,0 +1,5 @@
1
+ id,name,city,country_name,iata_code,icao_code,latitude,longitude,altitude,timezone,daylight_savings
2
+ 6060,"Capitán Av. German Quiroga G. Airport","San Borja","Bolivia","SRJ","SLSB",-14.8592,-66.7375,633,-4,"U"
3
+ 6058,"Capitán Av. Selin Zeitun Lopez Airport","Riberalta","Bolivia","RIB","SLRI",-11,-66,462,-4,"U"
4
+ 6056,"Capitán de Av. Emilio Beltrán Airport","Guayaramerín","Bolivia","GYA","SLGY",-10.8206,-65.3456,557,-4,"U"
5
+ 6054,"Obando Airport","Puerto Inírida","Colombia","PDA","SKPD",3.85,-67.91,460,-5,"U"
@@ -44,7 +44,7 @@ describe RemoteTable do
44
44
 
45
45
  it "ignore UTF-8 byte order marks" do
46
46
  t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
47
- t.rows.first['name'].must_equal 'Tawleed'
47
+ t.rows.first['Name'].must_equal 'Tawleed'
48
48
  end
49
49
 
50
50
  # this will die with an error about libcurl if your curl doesn't support ssl
@@ -8,6 +8,17 @@ describe RemoteTable do
8
8
  t[5]["Requirements"].must_equal "Secure encryption of all data"
9
9
  end
10
10
 
11
+ it "doesn't screw up UTF-8" do
12
+ t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
13
+ t[3]['city'].must_equal "Puerto Inírida"
14
+ end
15
+
16
+ it "likes paths as much as urls for local files" do
17
+ by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
18
+ by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
19
+ by_path.rows.must_equal by_url.rows
20
+ end
21
+
11
22
  it "does its best to download urls without http://" do
12
23
  t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
13
24
  t[5]["Requirements"].must_equal "Secure encryption of all data"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-05-16 00:00:00.000000000 Z
13
+ date: 2012-06-20 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -159,6 +159,7 @@ files:
159
159
  - remote_table.gemspec
160
160
  - test/fixtures/data.yml
161
161
  - test/helper.rb
162
+ - test/support/airports.utf8.csv
162
163
  - test/support/list-en1-semic-3.neooffice.binary.ods
163
164
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
164
165
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
@@ -198,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
198
199
  version: '0'
199
200
  requirements: []
200
201
  rubyforge_project: remotetable
201
- rubygems_version: 1.8.21
202
+ rubygems_version: 1.8.24
202
203
  signing_key:
203
204
  specification_version: 3
204
205
  summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
@@ -206,6 +207,7 @@ summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (com
206
207
  test_files:
207
208
  - test/fixtures/data.yml
208
209
  - test/helper.rb
210
+ - test/support/airports.utf8.csv
209
211
  - test/support/list-en1-semic-3.neooffice.binary.ods
210
212
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
211
213
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64