remote_table 0.2.32 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +1 -1
- data/README.rdoc +21 -7
- data/Rakefile +12 -61
- data/lib/remote_table/cleaner.rb +19 -0
- data/lib/remote_table/executor.rb +29 -0
- data/lib/remote_table/format/delimited.rb +62 -0
- data/lib/remote_table/format/excel.rb +10 -0
- data/lib/remote_table/format/excelx.rb +10 -0
- data/lib/remote_table/format/fixed_width.rb +47 -0
- data/lib/remote_table/format/html.rb +43 -0
- data/lib/remote_table/format/mixins/rooable.rb +47 -0
- data/lib/remote_table/format/mixins/textual.rb +34 -0
- data/lib/remote_table/format/open_office.rb +10 -0
- data/lib/remote_table/format.rb +35 -0
- data/lib/remote_table/hasher.rb +25 -0
- data/lib/remote_table/local_file.rb +92 -0
- data/lib/remote_table/properties.rb +209 -0
- data/lib/remote_table/transformer.rb +17 -0
- data/lib/remote_table/version.rb +3 -0
- data/lib/remote_table.rb +91 -99
- data/remote_table.gemspec +32 -77
- data/test/{test_helper.rb → helper.rb} +9 -2
- data/test/test_big.rb +61 -0
- data/test/test_errata.rb +46 -0
- data/test/test_old_syntax.rb +229 -0
- data/test/test_old_transform.rb +49 -0
- data/test/test_remote_table.rb +13 -0
- metadata +176 -53
- data/VERSION +0 -1
- data/lib/remote_table/file/csv.rb +0 -49
- data/lib/remote_table/file/fixed_width.rb +0 -19
- data/lib/remote_table/file/html.rb +0 -37
- data/lib/remote_table/file/ods.rb +0 -11
- data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
- data/lib/remote_table/file/xls.rb +0 -11
- data/lib/remote_table/file/xlsx.rb +0 -11
- data/lib/remote_table/file.rb +0 -100
- data/lib/remote_table/package.rb +0 -89
- data/lib/remote_table/request.rb +0 -44
- data/lib/remote_table/transform.rb +0 -58
- data/test/remote_table_test.rb +0 -386
data/test/test_big.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBig < Test::Unit::TestCase
|
4
|
+
should "open an XLS inside a zip file" do
|
5
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
6
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
7
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
8
|
+
assert_equal 'VOLVO', t.rows.last['Manufacturer']
|
9
|
+
assert_equal 'V70 XC AWD', t.rows.last['carline name']
|
10
|
+
end
|
11
|
+
|
12
|
+
should "not have indifferent string/symbol hash access" do
|
13
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
14
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
15
|
+
assert_equal nil, t.rows.first[:Manufacturer]
|
16
|
+
end
|
17
|
+
|
18
|
+
should "open a CSV inside a zip file" do
|
19
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
|
20
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
21
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
22
|
+
assert_equal 'TOYOTA', t.rows.last['Manufacturer']
|
23
|
+
assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
|
24
|
+
end
|
25
|
+
|
26
|
+
should "open a fixed-width file with an inline schema inside a zip file" do
|
27
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
28
|
+
:filename => 'Gd6-dsc.txt',
|
29
|
+
:format => :fixed_width,
|
30
|
+
:crop => 21..26, # inclusive
|
31
|
+
:cut => '2-',
|
32
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
33
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
34
|
+
[ 'spacer', 2 ],
|
35
|
+
[ 'name', 52, { :type => :string } ]])
|
36
|
+
assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
|
37
|
+
assert_equal 'R', t.rows.first['code']
|
38
|
+
assert_equal 'electricity', t.rows.last['name']
|
39
|
+
assert_equal 'El', t.rows.last['code']
|
40
|
+
end
|
41
|
+
|
42
|
+
should "send form data, follow redirects and use a filename glob" do
|
43
|
+
url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
44
|
+
form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_CITY_NUM&VarDesc=OriginCityNum&VarType=Num&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_CITY_NUM&VarDesc=DestCityNum&VarType=Num&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
|
45
|
+
t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
|
46
|
+
assert_equal 'United States of America', t.rows.first['DEST_COUNTRY_NAME']
|
47
|
+
end
|
48
|
+
|
49
|
+
# should "provide a row_hash on demand" do
|
50
|
+
# t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
51
|
+
# :filename => 'Gd6-dsc.txt',
|
52
|
+
# :format => :fixed_width,
|
53
|
+
# :crop => 21..26, # inclusive
|
54
|
+
# :cut => '2-',
|
55
|
+
# :select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
56
|
+
# :schema => [[ 'code', 2, { :type => :string } ],
|
57
|
+
# [ 'spacer', 2 ],
|
58
|
+
# [ 'name', 52, { :type => :string } ]])
|
59
|
+
# assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
|
60
|
+
# end
|
61
|
+
end
|
data/test/test_errata.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'errata'
|
3
|
+
|
4
|
+
class AircraftGuru
|
5
|
+
def is_a_dc_plane?(row)
|
6
|
+
row['Designator'] =~ /^DC\d/i
|
7
|
+
end
|
8
|
+
|
9
|
+
# def is_a_crj_900?(row)
|
10
|
+
# row['Designator'].downcase == 'crj9'
|
11
|
+
# end
|
12
|
+
|
13
|
+
def is_a_g159?(row)
|
14
|
+
row['Designator'] =~ /^G159$/
|
15
|
+
end
|
16
|
+
|
17
|
+
def is_a_galx?(row)
|
18
|
+
row['Designator'] =~ /^GALX$/
|
19
|
+
end
|
20
|
+
|
21
|
+
def method_missing(method_id, *args, &block)
|
22
|
+
if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
|
23
|
+
manufacturer_name = $1
|
24
|
+
manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
|
25
|
+
matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
|
26
|
+
method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class TestErrata < Test::Unit::TestCase
|
34
|
+
should "be able to apply errata files" do
|
35
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
36
|
+
:encoding => 'windows-1252',
|
37
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
38
|
+
:column_xpath => 'td',
|
39
|
+
:errata => Errata.new(:table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'),
|
40
|
+
:responder => AircraftGuru.new)
|
41
|
+
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
42
|
+
assert g1
|
43
|
+
assert_equal 'GULFSTREAM AEROSPACE', g1['Manufacturer']
|
44
|
+
assert_equal 'Gulfstream I', g1['Model']
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
$test2_rows_with_blanks = [
|
4
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
5
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
6
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
7
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
8
|
+
]
|
9
|
+
$test2_rows = [
|
10
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
11
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
12
|
+
]
|
13
|
+
$test2_rows_with_blanks.freeze
|
14
|
+
$test2_rows.freeze
|
15
|
+
|
16
|
+
class TestOldSyntax < Test::Unit::TestCase
|
17
|
+
should "open an XLSX like an array (numbered columns)" do
|
18
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
|
19
|
+
assert_equal "Secure encryption of all data", t.rows[5][0]
|
20
|
+
end
|
21
|
+
|
22
|
+
should "open an XLSX with custom headers" do
|
23
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
|
24
|
+
assert_equal "Secure encryption of all data", t.rows[5]['foo']
|
25
|
+
end
|
26
|
+
|
27
|
+
should "open an XLSX" do
|
28
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
29
|
+
assert_equal "Secure encryption of all data", t.rows[5]["Requirements"]
|
30
|
+
end
|
31
|
+
|
32
|
+
should "work on filenames with spaces, using globbing" do
|
33
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
34
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
35
|
+
end
|
36
|
+
|
37
|
+
should "work on filenames with spaces" do
|
38
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
39
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
40
|
+
end
|
41
|
+
|
42
|
+
should "ignore UTF-8 byte order marks" do
|
43
|
+
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
44
|
+
assert_equal 'Tawleed', t.rows.first['name']
|
45
|
+
end
|
46
|
+
|
47
|
+
# this will die with an error about libcurl if your curl doesn't support ssl
|
48
|
+
should "connect using HTTPS if available" do
|
49
|
+
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
50
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
51
|
+
assert_equal 'AL', t.rows.first['State']
|
52
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
53
|
+
assert_equal 'WY', t.rows.last['State']
|
54
|
+
end
|
55
|
+
|
56
|
+
should "read an HTML table made with frontpage" do
|
57
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
|
58
|
+
:encoding => 'US-ASCII',
|
59
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
60
|
+
:column_xpath => 'td'
|
61
|
+
assert_equal 'E110', t.rows.first['Designator']
|
62
|
+
assert_equal 'EMBRAER', t.rows.first['Manufacturer']
|
63
|
+
assert_equal 'EZKC', t.rows.last['Designator']
|
64
|
+
assert_equal 'EZ King Cobra', t.rows.last['Model']
|
65
|
+
end
|
66
|
+
|
67
|
+
should "hash rows without paying attention to order" do
|
68
|
+
x = ActiveSupport::OrderedHash.new
|
69
|
+
x[:a] = 1
|
70
|
+
x[:b] = 2
|
71
|
+
|
72
|
+
y = ActiveSupport::OrderedHash.new
|
73
|
+
y[:b] = 2
|
74
|
+
y[:a] = 1
|
75
|
+
|
76
|
+
assert_not_equal Marshal.dump(x), Marshal.dump(y)
|
77
|
+
assert_equal RemoteTable::Transform.row_hash(x), RemoteTable::Transform.row_hash(y)
|
78
|
+
end
|
79
|
+
|
80
|
+
should "open a Google Docs url (as a CSV)" do
|
81
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
82
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
83
|
+
assert_equal 'AL', t.rows.first['State']
|
84
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
85
|
+
assert_equal 'WY', t.rows.last['State']
|
86
|
+
end
|
87
|
+
|
88
|
+
should "open a Google Docs url (as a CSV, with sheet options)" do
|
89
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
|
90
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
91
|
+
assert_equal 'AL', t.rows.first['State']
|
92
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
93
|
+
assert_equal 'WY', t.rows.last['State']
|
94
|
+
end
|
95
|
+
|
96
|
+
should "open a Google Docs url as a CSV without headers" do
|
97
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
98
|
+
assert_equal 'AL', t.rows.first[0]
|
99
|
+
assert_equal 'Gulf Coast', t.rows.first[4]
|
100
|
+
assert_equal 'WY', t.rows.last[0]
|
101
|
+
assert_equal 'Rocky Mountain', t.rows.last[4]
|
102
|
+
end
|
103
|
+
|
104
|
+
should "take the last of values if the header is duplicated" do
|
105
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
106
|
+
assert_equal '2', t.rows.first['dup_header']
|
107
|
+
end
|
108
|
+
|
109
|
+
should "respect field order in CSVs without headers" do
|
110
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
111
|
+
t.rows.each do |row|
|
112
|
+
last_column_number = -1
|
113
|
+
row.each do |column_number, v|
|
114
|
+
next if column_number == 'row_hash'
|
115
|
+
assert column_number.is_a?(Numeric)
|
116
|
+
assert(column_number > last_column_number)
|
117
|
+
last_column_number = column_number
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
%w{ csv ods xls }.each do |format|
|
123
|
+
eval %{
|
124
|
+
should "read #{format}" do
|
125
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
|
126
|
+
# no blank headers
|
127
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
128
|
+
# correct values
|
129
|
+
t.rows.each_with_index do |row, index|
|
130
|
+
assert_equal row.except('row_hash'), $test2_rows[index]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
should "read #{format}, keeping blank rows" do
|
135
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
|
136
|
+
# no blank headers
|
137
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
138
|
+
# correct values
|
139
|
+
t.rows.each_with_index do |row, index|
|
140
|
+
assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
should "read fixed width correctly" do
|
147
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
148
|
+
:format => :fixed_width,
|
149
|
+
:skip => 1,
|
150
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
151
|
+
[ 'spacer', 1 ],
|
152
|
+
[ 'header5', 10, { :type => :string } ],
|
153
|
+
[ 'spacer', 12 ],
|
154
|
+
[ 'header6', 10, { :type => :string } ]])
|
155
|
+
|
156
|
+
# no blank headers
|
157
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
158
|
+
# correct values
|
159
|
+
t.rows.each_with_index do |row, index|
|
160
|
+
assert_equal row.except('row_hash'), $test2_rows[index]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
should "read fixed width correctly, keeping blank rows" do
|
165
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
166
|
+
:format => :fixed_width,
|
167
|
+
:keep_blank_rows => true,
|
168
|
+
:skip => 1,
|
169
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
170
|
+
[ 'spacer', 1 ],
|
171
|
+
[ 'header5', 10, { :type => :string } ],
|
172
|
+
[ 'spacer', 12 ],
|
173
|
+
[ 'header6', 10, { :type => :string } ]])
|
174
|
+
|
175
|
+
# no blank headers
|
176
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
177
|
+
# correct values
|
178
|
+
t.rows.each_with_index do |row, index|
|
179
|
+
assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
should "have the same row hash across formats" do
|
184
|
+
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
185
|
+
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
186
|
+
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
187
|
+
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
188
|
+
:format => :fixed_width,
|
189
|
+
:skip => 1,
|
190
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
191
|
+
[ 'spacer', 1 ],
|
192
|
+
[ 'header2', 10, { :type => :string } ],
|
193
|
+
[ 'spacer', 12 ],
|
194
|
+
[ 'header3', 10, { :type => :string } ]])
|
195
|
+
|
196
|
+
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
197
|
+
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
198
|
+
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
199
|
+
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
200
|
+
:format => :fixed_width,
|
201
|
+
:skip => 1,
|
202
|
+
:schema => [[ 'spacer', 11 ],
|
203
|
+
[ 'header2', 10, { :type => :string } ],
|
204
|
+
[ 'spacer', 1 ],
|
205
|
+
[ 'header3', 10, { :type => :string } ],
|
206
|
+
[ 'spacer', 1 ],
|
207
|
+
[ 'header1', 10, { :type => :string } ]])
|
208
|
+
|
209
|
+
|
210
|
+
reference = csv.rows[0]['row_hash']
|
211
|
+
|
212
|
+
# same row hashes
|
213
|
+
assert_equal reference, ods.rows[0]['row_hash']
|
214
|
+
assert_equal reference, xls.rows[0]['row_hash']
|
215
|
+
assert_equal reference, fixed_width.rows[0]['row_hash']
|
216
|
+
# same row hashes with different order
|
217
|
+
assert_equal reference, csv2.rows[0]['row_hash']
|
218
|
+
assert_equal reference, ods2.rows[0]['row_hash']
|
219
|
+
assert_equal reference, xls2.rows[0]['row_hash']
|
220
|
+
assert_equal reference, fixed_width2.rows[0]['row_hash']
|
221
|
+
end
|
222
|
+
|
223
|
+
should "open an ODS" do
|
224
|
+
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
225
|
+
|
226
|
+
assert_equal 'Central Africa', t.rows[5]['name']
|
227
|
+
assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class FuelOilParser
|
4
|
+
def initialize(options = {})
|
5
|
+
# nothing
|
6
|
+
end
|
7
|
+
def add_hints!(bus)
|
8
|
+
bus[:sheet] = 'Data 1'
|
9
|
+
bus[:skip] = 2
|
10
|
+
bus[:select] = lambda { |row| row['year'] > 1989 }
|
11
|
+
end
|
12
|
+
def apply(row)
|
13
|
+
virtual_rows = []
|
14
|
+
row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
|
15
|
+
first_part = $1
|
16
|
+
next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
|
17
|
+
if first_part.start_with?('U.S.')
|
18
|
+
locatable = "united_states (Country)"
|
19
|
+
elsif first_part.include?('PADD')
|
20
|
+
/\(PADD (.*)\)/.match(first_part)
|
21
|
+
padd_part = $1
|
22
|
+
next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
|
23
|
+
locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
|
24
|
+
else
|
25
|
+
locatable = "#{first_part} (State)"
|
26
|
+
end
|
27
|
+
date = Time.parse(date)
|
28
|
+
virtual_rows << {
|
29
|
+
'locatable' => locatable,
|
30
|
+
'cost' => cost,
|
31
|
+
'year' => date.year,
|
32
|
+
'month' => date.month
|
33
|
+
}
|
34
|
+
end
|
35
|
+
virtual_rows
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class TestOldTransform < Test::Unit::TestCase
|
40
|
+
should "open an XLS with a parser" do
|
41
|
+
ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
|
42
|
+
ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
|
43
|
+
|
44
|
+
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
45
|
+
:transform => { :class => FuelOilParser })
|
46
|
+
assert t.rows.include?(ma_1990_01)
|
47
|
+
assert t.rows.include?(ga_1990_01)
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestRemoteTable < Test::Unit::TestCase
|
4
|
+
should "open an XLSX" do
|
5
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
6
|
+
assert_equal "Secure encryption of all data", t[5]["Requirements"]
|
7
|
+
end
|
8
|
+
|
9
|
+
should "add a row hash to every row" do
|
10
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
11
|
+
assert_equal "59d68cfc1cd6b32f5b333d6f0e4bea6d", t[5]['row_hash']
|
12
|
+
end
|
13
|
+
end
|