remote_table 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,13 @@
1
+ 2.0.2 / 2012-06-20
2
+
3
+ * Secrets revealed
4
+
5
+ * Indeed it is OK to omit the file:// as long as you are using an absolute path to a local file... e.g., RemoteTable.new('/path/to/foo')
6
+
7
+ * Bug fixes
8
+
9
+ * Don't bork UTF-8 by incorrectly treating \xAD as a soft hyphen
10
+
1
11
  2.0.1 / 2012-05-16
2
12
 
3
13
  * Enhancements
data/lib/remote_table.rb CHANGED
@@ -147,6 +147,9 @@ class RemoteTable
147
147
  # @example Local
148
148
  # file:///Users/myuser/Desktop/holidays.csv
149
149
  #
150
+ # @example Local using an absolute path
151
+ # /Users/myuser/Desktop/holidays.csv
152
+ #
150
153
  # @example Remote
151
154
  # http://data.brighterplanet.com/countries.csv
152
155
  #
@@ -3,17 +3,30 @@ require 'unix_utils'
3
3
 
4
4
  class RemoteTable
5
5
  # Helper methods that act on plaintext files before they are parsed
6
- module Plaintext
7
- CONSIDERED_HARMFUL = [
8
- '\xef\xbb\xbf', # UTF-8 byte order mark
9
- '\xc2\xad', # soft hyphen, often inserted by MS Office (html: ­)
10
- '\xad' # any remaining soft hyphens (sometimes seen in windows-1252)
11
- ]
6
+ module Plaintext
7
+ class << self
8
+ # @private
9
+ # Code for the soft hyphen, often inserted by MS Office (html: &shy;)
10
+ def soft_hyphen(encoding)
11
+ case encoding
12
+ when /775/, /85[02578]/
13
+ '\xF0'
14
+ when /utf-?8/i
15
+ '\xc2\xad'
16
+ else # iso-8859-1, latin1, windows-1252, etc...
17
+ '\xad'
18
+ end
19
+ end
20
+ end
21
+
22
+ # UTF-8 byte order mark
23
+ UTF8_BOM = '\xef\xbb\xbf'
12
24
  EOL_TO_UNIX = 's/\r\n|\n|\r/\n/g'
13
25
 
14
26
  # Remove bytes that are both useless and harmful in the vast majority of cases.
15
27
  def delete_harmful!
16
- local_copy.in_place :perl, "s/#{CONSIDERED_HARMFUL.join('//g; s/')}//g"
28
+ harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
29
+ local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
17
30
  end
18
31
 
19
32
  # No matter what the file encoding is SUPPOSED to be, run it through the system iconv binary to make sure it's UTF-8
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.2"
3
3
  end
@@ -0,0 +1,5 @@
1
+ id,name,city,country_name,iata_code,icao_code,latitude,longitude,altitude,timezone,daylight_savings
2
+ 6060,"Capitán Av. German Quiroga G. Airport","San Borja","Bolivia","SRJ","SLSB",-14.8592,-66.7375,633,-4,"U"
3
+ 6058,"Capitán Av. Selin Zeitun Lopez Airport","Riberalta","Bolivia","RIB","SLRI",-11,-66,462,-4,"U"
4
+ 6056,"Capitán de Av. Emilio Beltrán Airport","Guayaramerín","Bolivia","GYA","SLGY",-10.8206,-65.3456,557,-4,"U"
5
+ 6054,"Obando Airport","Puerto Inírida","Colombia","PDA","SKPD",3.85,-67.91,460,-5,"U"
@@ -44,7 +44,7 @@ describe RemoteTable do
44
44
 
45
45
  it "ignore UTF-8 byte order marks" do
46
46
  t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
47
- t.rows.first['name'].must_equal 'Tawleed'
47
+ t.rows.first['Name'].must_equal 'Tawleed'
48
48
  end
49
49
 
50
50
  # this will die with an error about libcurl if your curl doesn't support ssl
@@ -8,6 +8,17 @@ describe RemoteTable do
8
8
  t[5]["Requirements"].must_equal "Secure encryption of all data"
9
9
  end
10
10
 
11
+ it "doesn't screw up UTF-8" do
12
+ t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
13
+ t[3]['city'].must_equal "Puerto Inírida"
14
+ end
15
+
16
+ it "likes paths as much as urls for local files" do
17
+ by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
18
+ by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
19
+ by_path.rows.must_equal by_url.rows
20
+ end
21
+
11
22
  it "does its best to download urls without http://" do
12
23
  t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
13
24
  t[5]["Requirements"].must_equal "Secure encryption of all data"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-05-16 00:00:00.000000000 Z
13
+ date: 2012-06-20 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -159,6 +159,7 @@ files:
159
159
  - remote_table.gemspec
160
160
  - test/fixtures/data.yml
161
161
  - test/helper.rb
162
+ - test/support/airports.utf8.csv
162
163
  - test/support/list-en1-semic-3.neooffice.binary.ods
163
164
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
164
165
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
@@ -198,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
198
199
  version: '0'
199
200
  requirements: []
200
201
  rubyforge_project: remotetable
201
- rubygems_version: 1.8.21
202
+ rubygems_version: 1.8.24
202
203
  signing_key:
203
204
  specification_version: 3
204
205
  summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
@@ -206,6 +207,7 @@ summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (com
206
207
  test_files:
207
208
  - test/fixtures/data.yml
208
209
  - test/helper.rb
210
+ - test/support/airports.utf8.csv
209
211
  - test/support/list-en1-semic-3.neooffice.binary.ods
210
212
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
211
213
  - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64