remote_table 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/CHANGELOG +19 -0
- data/Gemfile +7 -1
- data/README.markdown +440 -0
- data/Rakefile +6 -14
- data/lib/remote_table.rb +27 -38
- data/lib/remote_table/{properties.rb → config.rb} +39 -43
- data/lib/remote_table/format.rb +24 -27
- data/lib/remote_table/format/delimited.rb +17 -21
- data/lib/remote_table/format/fixed_width.rb +9 -9
- data/lib/remote_table/format/html.rb +0 -2
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +13 -12
- data/lib/remote_table/format/mixins/processed_by_roo.rb +17 -13
- data/lib/remote_table/format/mixins/textual.rb +13 -13
- data/lib/remote_table/format/open_office.rb +3 -0
- data/lib/remote_table/format/xml.rb +0 -2
- data/lib/remote_table/format/yaml.rb +14 -0
- data/lib/remote_table/local_file.rb +69 -7
- data/lib/remote_table/transformer.rb +7 -4
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +5 -13
- data/test/fixtures/data.yml +4 -0
- data/test/helper.rb +8 -9
- data/test/test_big.rb +43 -53
- data/test/test_errata.rb +27 -25
- data/test/test_old_syntax.rb +193 -191
- data/test/test_old_transform.rb +12 -10
- data/test/test_remote_table.rb +57 -47
- metadata +48 -64
- data/.document +0 -5
- data/README.rdoc +0 -167
- data/lib/remote_table/utils.rb +0 -157
@@ -13,10 +13,13 @@ class RemoteTable
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
def legacy_transformer
|
16
|
-
return @legacy_transformer if @legacy_transformer
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
return @legacy_transformer[0] if @legacy_transformer.is_a?(::Array)
|
17
|
+
memo = if (transform_options = t.config.user_specified_options[:transform])
|
18
|
+
transform_options = transform_options.symbolize_keys
|
19
|
+
transform_options[:class].new transform_options.except(:class)
|
20
|
+
end
|
21
|
+
@legacy_transformer = [memo]
|
22
|
+
memo
|
20
23
|
end
|
21
24
|
end
|
22
25
|
end
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -1,35 +1,27 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require "remote_table/version"
|
2
|
+
require File.expand_path("../lib/remote_table/version", __FILE__)
|
4
3
|
|
5
4
|
Gem::Specification.new do |s|
|
6
5
|
s.name = "remote_table"
|
7
6
|
s.version = RemoteTable::VERSION
|
8
|
-
s.platform = Gem::Platform::RUBY
|
9
7
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
10
8
|
s.email = ["seamus@abshere.net"]
|
11
9
|
s.homepage = "https://github.com/seamusabshere/remote_table"
|
12
|
-
s.summary = "Open local or remote XLSX, XLS, ODS, CSV
|
10
|
+
s.summary = "Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV, TSV (tab separated), other delimited, fixed-width files."
|
13
11
|
s.description = %q{Gives you a standard way to parse various formats and treat them as an array of hashes.}
|
14
12
|
|
15
13
|
s.rubyforge_project = "remotetable"
|
16
14
|
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
16
|
s.files = `git ls-files`.split("\n")
|
18
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
18
|
s.require_paths = ["lib"]
|
21
19
|
|
22
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
23
21
|
s.add_runtime_dependency 'roo', '>=1.9'
|
24
|
-
s.add_runtime_dependency 'fixed_width-multibyte'
|
22
|
+
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
25
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
26
|
-
s.add_runtime_dependency '
|
27
|
-
s.add_runtime_dependency 'ensure-encoding'
|
24
|
+
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
28
25
|
s.add_runtime_dependency 'fastercsv', '>=1.5.0'
|
29
26
|
s.add_runtime_dependency 'hash_digest'
|
30
|
-
|
31
|
-
s.add_development_dependency 'errata', '>=0.2.0'
|
32
|
-
s.add_development_dependency 'test-unit'
|
33
|
-
s.add_development_dependency 'shoulda'
|
34
|
-
s.add_development_dependency 'rake'
|
35
27
|
end
|
data/test/helper.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
|
4
|
-
require '
|
5
|
-
require '
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'minitest/spec'
|
4
|
+
require 'minitest/autorun'
|
5
|
+
require 'minitest/reporters'
|
6
|
+
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
7
|
+
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
8
|
+
require 'remote_table'
|
6
9
|
|
7
|
-
|
8
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
9
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'remote_table'))
|
10
|
-
|
11
|
-
class Test::Unit::TestCase
|
10
|
+
class MiniTest::Spec
|
12
11
|
def setup
|
13
12
|
if RUBY_VERSION >= '1.9'
|
14
13
|
@old_default_internal = Encoding.default_internal
|
data/test/test_big.rb
CHANGED
@@ -1,61 +1,51 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
describe RemoteTable do
|
4
|
+
describe "when dealing with large files" do
|
5
|
+
it "open an XLS inside a zip file" do
|
6
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
7
|
+
t.rows.first['Manufacturer'].must_equal 'ACURA'
|
8
|
+
t.rows.first['carline name'].must_equal 'NSX'
|
9
|
+
t.rows.last['Manufacturer'].must_equal 'VOLVO'
|
10
|
+
t.rows.last['carline name'].must_equal 'V70 XC AWD'
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
it "not have indifferent string/symbol hash access" do
|
14
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
15
|
+
t.rows.first['Manufacturer'].must_equal 'ACURA'
|
16
|
+
t.rows.first[:Manufacturer].must_equal nil
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
it "open a CSV inside a zip file" do
|
20
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
|
21
|
+
t.rows.length.must_equal 806
|
22
|
+
t.rows.first['Manufacturer'].must_equal 'ACURA'
|
23
|
+
t.rows.first['carline name'].must_equal 'NSX'
|
24
|
+
t.rows.last['Manufacturer'].must_equal 'TOYOTA'
|
25
|
+
t.rows.last['carline name'].must_equal 'RAV4 SOFT TOP 4WD'
|
26
|
+
end
|
25
27
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
28
|
+
it "open a fixed-width file with an inline schema inside a zip file" do
|
29
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
30
|
+
:filename => 'Gd6-dsc.txt',
|
31
|
+
:format => :fixed_width,
|
32
|
+
:crop => 21..26, # inclusive
|
33
|
+
:cut => '2-',
|
34
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
35
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
36
|
+
[ 'spacer', 2 ],
|
37
|
+
[ 'name', 52, { :type => :string } ]])
|
38
|
+
t.rows.first['name'].must_equal 'regular grade gasoline (octane number of 87)'
|
39
|
+
t.rows.first['code'].must_equal 'R'
|
40
|
+
t.rows.last['name'].must_equal 'electricity'
|
41
|
+
t.rows.last['code'].must_equal 'El'
|
42
|
+
end
|
41
43
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
44
|
+
it "send form data, follow redirects and use a filename glob" do
|
45
|
+
url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
46
|
+
form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
|
47
|
+
t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
|
48
|
+
t.rows.first['DEST_COUNTRY_NAME'].must_equal 'United States'
|
49
|
+
end
|
47
50
|
end
|
48
|
-
|
49
|
-
# should "provide a row_hash on demand" do
|
50
|
-
# t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
51
|
-
# :filename => 'Gd6-dsc.txt',
|
52
|
-
# :format => :fixed_width,
|
53
|
-
# :crop => 21..26, # inclusive
|
54
|
-
# :cut => '2-',
|
55
|
-
# :select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
56
|
-
# :schema => [[ 'code', 2, { :type => :string } ],
|
57
|
-
# [ 'spacer', 2 ],
|
58
|
-
# [ 'name', 52, { :type => :string } ]])
|
59
|
-
# assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
|
60
|
-
# end
|
61
51
|
end
|
data/test/test_errata.rb
CHANGED
@@ -30,30 +30,32 @@ class AircraftGuru
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
33
|
+
describe RemoteTable do
|
34
|
+
describe "when using an errata file" do
|
35
|
+
it "be able to apply Errata instances directly" do
|
36
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
37
|
+
:encoding => 'windows-1252',
|
38
|
+
:row_xpath => '//table[2]//table[1]//tr[3]//tr',
|
39
|
+
:column_xpath => 'td',
|
40
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
41
|
+
:responder => AircraftGuru.new)
|
42
|
+
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
43
|
+
g1.wont_be_nil
|
44
|
+
g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
|
45
|
+
g1['Model'].must_equal 'Gulfstream I'
|
46
|
+
end
|
47
|
+
|
48
|
+
it "be able to apply erratas given a hash of options" do
|
49
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
50
|
+
:encoding => 'windows-1252',
|
51
|
+
:row_xpath => '//table[2]//table[1]//tr[3]//tr',
|
52
|
+
:column_xpath => 'td',
|
53
|
+
:errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
54
|
+
:responder => AircraftGuru.new }
|
55
|
+
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
56
|
+
g1.wont_be_nil
|
57
|
+
g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
|
58
|
+
g1['Model'].must_equal 'Gulfstream I'
|
59
|
+
end
|
58
60
|
end
|
59
61
|
end
|
data/test/test_old_syntax.rb
CHANGED
@@ -13,211 +13,213 @@ $test2_rows = [
|
|
13
13
|
$test2_rows_with_blanks.freeze
|
14
14
|
$test2_rows.freeze
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
describe RemoteTable do
|
17
|
+
describe "when using old-style syntax" do
|
18
|
+
it "open an XLSX like an array (numbered columns)" do
|
19
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
|
20
|
+
t.rows[0][0].must_equal "Requirements"
|
21
|
+
t.rows[5][0].must_equal "Software-As-A-Service"
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
it "open an XLSX with custom headers" do
|
25
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
|
26
|
+
t.rows[0]['foo'].must_equal "Requirements"
|
27
|
+
t.rows[5]['foo'].must_equal "Software-As-A-Service"
|
28
|
+
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
should "work on filenames with spaces, using globbing" do
|
35
|
-
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
36
|
-
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
37
|
-
end
|
38
|
-
|
39
|
-
should "work on filenames with spaces" do
|
40
|
-
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
41
|
-
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
42
|
-
end
|
43
|
-
|
44
|
-
should "ignore UTF-8 byte order marks" do
|
45
|
-
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
46
|
-
assert_equal 'Tawleed', t.rows.first['name']
|
47
|
-
end
|
48
|
-
|
49
|
-
# this will die with an error about libcurl if your curl doesn't support ssl
|
50
|
-
should "connect using HTTPS if available" do
|
51
|
-
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
52
|
-
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
53
|
-
assert_equal 'AL', t.rows.first['State']
|
54
|
-
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
55
|
-
assert_equal 'WY', t.rows.last['State']
|
56
|
-
end
|
57
|
-
|
58
|
-
should "read an HTML table made with frontpage" do
|
59
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
|
60
|
-
:encoding => 'US-ASCII',
|
61
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
62
|
-
:column_xpath => 'td'
|
63
|
-
assert_equal 'E110', t.rows.first['Designator']
|
64
|
-
assert_equal 'EMBRAER', t.rows.first['Manufacturer']
|
65
|
-
assert_equal 'EZKC', t.rows.last['Designator']
|
66
|
-
assert_equal 'EZ King Cobra', t.rows.last['Model']
|
67
|
-
end
|
68
|
-
|
69
|
-
should "hash rows without paying attention to order" do
|
70
|
-
x = ActiveSupport::OrderedHash.new
|
71
|
-
x[:a] = 1
|
72
|
-
x[:b] = 2
|
73
|
-
|
74
|
-
y = ActiveSupport::OrderedHash.new
|
75
|
-
y[:b] = 2
|
76
|
-
y[:a] = 1
|
77
|
-
|
78
|
-
assert_not_equal Marshal.dump(x), Marshal.dump(y)
|
79
|
-
assert_equal RemoteTable::Transform.row_hash(x), RemoteTable::Transform.row_hash(y)
|
80
|
-
end
|
81
|
-
|
82
|
-
should "open a Google Docs url (as a CSV)" do
|
83
|
-
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
84
|
-
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
85
|
-
assert_equal 'AL', t.rows.first['State']
|
86
|
-
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
87
|
-
assert_equal 'WY', t.rows.last['State']
|
88
|
-
end
|
89
|
-
|
90
|
-
should "open a Google Docs url (as a CSV, with sheet options)" do
|
91
|
-
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
|
92
|
-
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
93
|
-
assert_equal 'AL', t.rows.first['State']
|
94
|
-
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
95
|
-
assert_equal 'WY', t.rows.last['State']
|
96
|
-
end
|
97
|
-
|
98
|
-
should "open a Google Docs url as a CSV without headers" do
|
99
|
-
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
100
|
-
assert_equal 'AL', t.rows.first[0]
|
101
|
-
assert_equal 'Gulf Coast', t.rows.first[4]
|
102
|
-
assert_equal 'WY', t.rows.last[0]
|
103
|
-
assert_equal 'Rocky Mountain', t.rows.last[4]
|
104
|
-
end
|
105
|
-
|
106
|
-
should "take the last of values if the header is duplicated" do
|
107
|
-
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
108
|
-
assert_equal '2', t.rows.first['dup_header']
|
109
|
-
end
|
110
|
-
|
111
|
-
should "return an Array when instructed not to use headers" do
|
112
|
-
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
113
|
-
t.rows.each do |row|
|
114
|
-
assert row.is_a?(::Array)
|
30
|
+
it "open an XLSX" do
|
31
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
32
|
+
t.rows[5]["Requirements"].must_equal "Secure encryption of all data"
|
115
33
|
end
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
34
|
+
|
35
|
+
it "work on filenames with spaces, using globbing" do
|
36
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
37
|
+
t.rows.first['MFR'].must_equal 'ASTON MARTIN'
|
38
|
+
end
|
39
|
+
|
40
|
+
it "work on filenames with spaces" do
|
41
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
42
|
+
t.rows.first['MFR'].must_equal 'ASTON MARTIN'
|
43
|
+
end
|
44
|
+
|
45
|
+
it "ignore UTF-8 byte order marks" do
|
46
|
+
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
47
|
+
t.rows.first['name'].must_equal 'Tawleed'
|
48
|
+
end
|
49
|
+
|
50
|
+
# this will die with an error about libcurl if your curl doesn't support ssl
|
51
|
+
it "connect using HTTPS if available" do
|
52
|
+
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
53
|
+
t.rows.first['PAD district name'].must_equal 'Gulf Coast'
|
54
|
+
t.rows.first['State'].must_equal 'AL'
|
55
|
+
t.rows.last['PAD district name'].must_equal 'Rocky Mountain'
|
56
|
+
t.rows.last['State'].must_equal 'WY'
|
57
|
+
end
|
58
|
+
|
59
|
+
it "read an HTML table made with frontpage" do
|
60
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
|
61
|
+
:encoding => 'US-ASCII',
|
62
|
+
:row_xpath => '//table[2]//table[1]//tr[3]//tr',
|
63
|
+
:column_xpath => 'td'
|
64
|
+
t.rows.first['Designator'].must_equal 'E110'
|
65
|
+
t.rows.first['Manufacturer'].must_equal 'EMBRAER'
|
66
|
+
t.rows.last['Designator'].must_equal 'EZKC'
|
67
|
+
t.rows.last['Model'].must_equal 'EZ King Cobra'
|
68
|
+
end
|
69
|
+
|
70
|
+
it "hash rows without paying attention to order" do
|
71
|
+
x = ActiveSupport::OrderedHash.new
|
72
|
+
x[:a] = 1
|
73
|
+
x[:b] = 2
|
74
|
+
|
75
|
+
y = ActiveSupport::OrderedHash.new
|
76
|
+
y[:b] = 2
|
77
|
+
y[:a] = 1
|
78
|
+
|
79
|
+
Marshal.dump(x).wont_equal Marshal.dump(y)
|
80
|
+
RemoteTable::Transform.row_hash(y).must_equal RemoteTable::Transform.row_hash(x)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "open a Google Docs url (as a CSV)" do
|
84
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
85
|
+
t.rows.first['PAD district name'].must_equal 'Gulf Coast'
|
86
|
+
t.rows.first['State'].must_equal 'AL'
|
87
|
+
t.rows.last['PAD district name'].must_equal 'Rocky Mountain'
|
88
|
+
t.rows.last['State'].must_equal 'WY'
|
89
|
+
end
|
90
|
+
|
91
|
+
it "open a Google Docs url (as a CSV, with sheet options)" do
|
92
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
|
93
|
+
t.rows.first['PAD district name'].must_equal 'Gulf Coast'
|
94
|
+
t.rows.first['State'].must_equal 'AL'
|
95
|
+
t.rows.last['PAD district name'].must_equal 'Rocky Mountain'
|
96
|
+
t.rows.last['State'].must_equal 'WY'
|
97
|
+
end
|
98
|
+
|
99
|
+
it "open a Google Docs url as a CSV without headers" do
|
100
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
101
|
+
t.rows.first[0].must_equal 'AL'
|
102
|
+
t.rows.first[4].must_equal 'Gulf Coast'
|
103
|
+
t.rows.last[0].must_equal 'WY'
|
104
|
+
t.rows.last[4].must_equal 'Rocky Mountain'
|
105
|
+
end
|
106
|
+
|
107
|
+
it "take the last of values if the header is duplicated" do
|
108
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
109
|
+
t.rows.first['dup_header'].must_equal '2'
|
110
|
+
end
|
111
|
+
|
112
|
+
it "return an Array when instructed not to use headers" do
|
113
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
114
|
+
t.rows.each do |row|
|
115
|
+
row.must_be_kind_of ::Array
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
%w{ csv ods xls }.each do |format|
|
120
|
+
it "read #{format}" do
|
121
|
+
t = RemoteTable.new(:url => "http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}")
|
122
|
+
# no blank headers
|
123
|
+
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
124
|
+
# correct values
|
125
|
+
t.rows.each_with_index do |row, index|
|
126
|
+
row.except('row_hash').must_equal $test2_rows[index]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
it "read #{format}, keeping blank rows" do
|
131
|
+
t = RemoteTable.new(:url => "http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}", :keep_blank_rows => true)
|
132
|
+
# no blank headers
|
133
|
+
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
134
|
+
# correct values
|
135
|
+
t.rows.each_with_index do |row, index|
|
136
|
+
row.except('row_hash').must_equal $test2_rows_with_blanks[index]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
it "read fixed width correctly" do
|
142
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
143
|
+
:format => :fixed_width,
|
144
|
+
:skip => 1,
|
145
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
146
|
+
[ 'spacer', 1 ],
|
147
|
+
[ 'header5', 10, { :type => :string } ],
|
148
|
+
[ 'spacer', 12 ],
|
149
|
+
[ 'header6', 10, { :type => :string } ]])
|
150
|
+
|
121
151
|
# no blank headers
|
122
|
-
|
152
|
+
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
123
153
|
# correct values
|
124
154
|
t.rows.each_with_index do |row, index|
|
125
|
-
|
155
|
+
$test2_rows[index].must_equal row.except('row_hash')
|
126
156
|
end
|
127
157
|
end
|
128
|
-
|
129
|
-
|
130
|
-
t = RemoteTable.new(:url =>
|
158
|
+
|
159
|
+
it "read fixed width correctly, keeping blank rows" do
|
160
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
161
|
+
:format => :fixed_width,
|
162
|
+
:keep_blank_rows => true,
|
163
|
+
:skip => 1,
|
164
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
165
|
+
[ 'spacer', 1 ],
|
166
|
+
[ 'header5', 10, { :type => :string } ],
|
167
|
+
[ 'spacer', 12 ],
|
168
|
+
[ 'header6', 10, { :type => :string } ]])
|
169
|
+
|
131
170
|
# no blank headers
|
132
|
-
|
171
|
+
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
133
172
|
# correct values
|
134
173
|
t.rows.each_with_index do |row, index|
|
135
|
-
|
174
|
+
$test2_rows_with_blanks[index].must_equal row.except('row_hash')
|
136
175
|
end
|
137
176
|
end
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
[ '
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
177
|
+
|
178
|
+
it "have the same row hash across formats" do
|
179
|
+
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
180
|
+
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
181
|
+
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
182
|
+
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
183
|
+
:format => :fixed_width,
|
184
|
+
:skip => 1,
|
185
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
186
|
+
[ 'spacer', 1 ],
|
187
|
+
[ 'header2', 10, { :type => :string } ],
|
188
|
+
[ 'spacer', 12 ],
|
189
|
+
[ 'header3', 10, { :type => :string } ]])
|
190
|
+
|
191
|
+
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
192
|
+
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
193
|
+
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
194
|
+
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
195
|
+
:format => :fixed_width,
|
196
|
+
:skip => 1,
|
197
|
+
:schema => [[ 'spacer', 11 ],
|
198
|
+
[ 'header2', 10, { :type => :string } ],
|
199
|
+
[ 'spacer', 1 ],
|
200
|
+
[ 'header3', 10, { :type => :string } ],
|
201
|
+
[ 'spacer', 1 ],
|
202
|
+
[ 'header1', 10, { :type => :string } ]])
|
203
|
+
|
204
|
+
|
205
|
+
reference = csv.rows[0]['row_hash']
|
206
|
+
|
207
|
+
# same row hashes
|
208
|
+
ods.rows[0]['row_hash'].must_equal reference
|
209
|
+
xls.rows[0]['row_hash'].must_equal reference
|
210
|
+
fixed_width.rows[0]['row_hash'].must_equal reference
|
211
|
+
# same row hashes with different order
|
212
|
+
csv2.rows[0]['row_hash'].must_equal reference
|
213
|
+
ods2.rows[0]['row_hash'].must_equal reference
|
214
|
+
xls2.rows[0]['row_hash'].must_equal reference
|
215
|
+
fixed_width2.rows[0]['row_hash'].must_equal reference
|
155
216
|
end
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
:skip => 1,
|
163
|
-
:schema => [[ 'header4', 10, { :type => :string } ],
|
164
|
-
[ 'spacer', 1 ],
|
165
|
-
[ 'header5', 10, { :type => :string } ],
|
166
|
-
[ 'spacer', 12 ],
|
167
|
-
[ 'header6', 10, { :type => :string } ]])
|
168
|
-
|
169
|
-
# no blank headers
|
170
|
-
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
171
|
-
# correct values
|
172
|
-
t.rows.each_with_index do |row, index|
|
173
|
-
assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
|
217
|
+
|
218
|
+
it "open an ODS" do
|
219
|
+
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
220
|
+
|
221
|
+
t.rows[5]['name'].must_equal 'Central Africa'
|
222
|
+
t.rows[5]['MAP DATA population (millions) 2002'].to_i.must_equal 99
|
174
223
|
end
|
175
224
|
end
|
176
|
-
|
177
|
-
should "have the same row hash across formats" do
|
178
|
-
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
179
|
-
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
180
|
-
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
181
|
-
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
182
|
-
:format => :fixed_width,
|
183
|
-
:skip => 1,
|
184
|
-
:schema => [[ 'header1', 10, { :type => :string } ],
|
185
|
-
[ 'spacer', 1 ],
|
186
|
-
[ 'header2', 10, { :type => :string } ],
|
187
|
-
[ 'spacer', 12 ],
|
188
|
-
[ 'header3', 10, { :type => :string } ]])
|
189
|
-
|
190
|
-
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
191
|
-
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
192
|
-
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
193
|
-
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
194
|
-
:format => :fixed_width,
|
195
|
-
:skip => 1,
|
196
|
-
:schema => [[ 'spacer', 11 ],
|
197
|
-
[ 'header2', 10, { :type => :string } ],
|
198
|
-
[ 'spacer', 1 ],
|
199
|
-
[ 'header3', 10, { :type => :string } ],
|
200
|
-
[ 'spacer', 1 ],
|
201
|
-
[ 'header1', 10, { :type => :string } ]])
|
202
|
-
|
203
|
-
|
204
|
-
reference = csv.rows[0]['row_hash']
|
205
|
-
|
206
|
-
# same row hashes
|
207
|
-
assert_equal reference, ods.rows[0]['row_hash']
|
208
|
-
assert_equal reference, xls.rows[0]['row_hash']
|
209
|
-
assert_equal reference, fixed_width.rows[0]['row_hash']
|
210
|
-
# same row hashes with different order
|
211
|
-
assert_equal reference, csv2.rows[0]['row_hash']
|
212
|
-
assert_equal reference, ods2.rows[0]['row_hash']
|
213
|
-
assert_equal reference, xls2.rows[0]['row_hash']
|
214
|
-
assert_equal reference, fixed_width2.rows[0]['row_hash']
|
215
|
-
end
|
216
|
-
|
217
|
-
should "open an ODS" do
|
218
|
-
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
219
|
-
|
220
|
-
assert_equal 'Central Africa', t.rows[5]['name']
|
221
|
-
assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
|
222
|
-
end
|
223
225
|
end
|