remote_table 0.2.32 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +1 -1
- data/README.rdoc +21 -7
- data/Rakefile +12 -61
- data/lib/remote_table/cleaner.rb +19 -0
- data/lib/remote_table/executor.rb +29 -0
- data/lib/remote_table/format/delimited.rb +62 -0
- data/lib/remote_table/format/excel.rb +10 -0
- data/lib/remote_table/format/excelx.rb +10 -0
- data/lib/remote_table/format/fixed_width.rb +47 -0
- data/lib/remote_table/format/html.rb +43 -0
- data/lib/remote_table/format/mixins/rooable.rb +47 -0
- data/lib/remote_table/format/mixins/textual.rb +34 -0
- data/lib/remote_table/format/open_office.rb +10 -0
- data/lib/remote_table/format.rb +35 -0
- data/lib/remote_table/hasher.rb +25 -0
- data/lib/remote_table/local_file.rb +92 -0
- data/lib/remote_table/properties.rb +209 -0
- data/lib/remote_table/transformer.rb +17 -0
- data/lib/remote_table/version.rb +3 -0
- data/lib/remote_table.rb +91 -99
- data/remote_table.gemspec +32 -77
- data/test/{test_helper.rb → helper.rb} +9 -2
- data/test/test_big.rb +61 -0
- data/test/test_errata.rb +46 -0
- data/test/test_old_syntax.rb +229 -0
- data/test/test_old_transform.rb +49 -0
- data/test/test_remote_table.rb +13 -0
- metadata +176 -53
- data/VERSION +0 -1
- data/lib/remote_table/file/csv.rb +0 -49
- data/lib/remote_table/file/fixed_width.rb +0 -19
- data/lib/remote_table/file/html.rb +0 -37
- data/lib/remote_table/file/ods.rb +0 -11
- data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
- data/lib/remote_table/file/xls.rb +0 -11
- data/lib/remote_table/file/xlsx.rb +0 -11
- data/lib/remote_table/file.rb +0 -100
- data/lib/remote_table/package.rb +0 -89
- data/lib/remote_table/request.rb +0 -44
- data/lib/remote_table/transform.rb +0 -58
- data/test/remote_table_test.rb +0 -386
data/test/test_big.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestBig < Test::Unit::TestCase
|
4
|
+
should "open an XLS inside a zip file" do
|
5
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
6
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
7
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
8
|
+
assert_equal 'VOLVO', t.rows.last['Manufacturer']
|
9
|
+
assert_equal 'V70 XC AWD', t.rows.last['carline name']
|
10
|
+
end
|
11
|
+
|
12
|
+
should "not have indifferent string/symbol hash access" do
|
13
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
14
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
15
|
+
assert_equal nil, t.rows.first[:Manufacturer]
|
16
|
+
end
|
17
|
+
|
18
|
+
should "open a CSV inside a zip file" do
|
19
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
|
20
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
21
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
22
|
+
assert_equal 'TOYOTA', t.rows.last['Manufacturer']
|
23
|
+
assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
|
24
|
+
end
|
25
|
+
|
26
|
+
should "open a fixed-width file with an inline schema inside a zip file" do
|
27
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
28
|
+
:filename => 'Gd6-dsc.txt',
|
29
|
+
:format => :fixed_width,
|
30
|
+
:crop => 21..26, # inclusive
|
31
|
+
:cut => '2-',
|
32
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
33
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
34
|
+
[ 'spacer', 2 ],
|
35
|
+
[ 'name', 52, { :type => :string } ]])
|
36
|
+
assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
|
37
|
+
assert_equal 'R', t.rows.first['code']
|
38
|
+
assert_equal 'electricity', t.rows.last['name']
|
39
|
+
assert_equal 'El', t.rows.last['code']
|
40
|
+
end
|
41
|
+
|
42
|
+
should "send form data, follow redirects and use a filename glob" do
|
43
|
+
url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
44
|
+
form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_CITY_NUM&VarDesc=OriginCityNum&VarType=Num&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_CITY_NUM&VarDesc=DestCityNum&VarType=Num&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
|
45
|
+
t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
|
46
|
+
assert_equal 'United States of America', t.rows.first['DEST_COUNTRY_NAME']
|
47
|
+
end
|
48
|
+
|
49
|
+
# should "provide a row_hash on demand" do
|
50
|
+
# t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
51
|
+
# :filename => 'Gd6-dsc.txt',
|
52
|
+
# :format => :fixed_width,
|
53
|
+
# :crop => 21..26, # inclusive
|
54
|
+
# :cut => '2-',
|
55
|
+
# :select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
56
|
+
# :schema => [[ 'code', 2, { :type => :string } ],
|
57
|
+
# [ 'spacer', 2 ],
|
58
|
+
# [ 'name', 52, { :type => :string } ]])
|
59
|
+
# assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
|
60
|
+
# end
|
61
|
+
end
|
data/test/test_errata.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'errata'
|
3
|
+
|
4
|
+
class AircraftGuru
|
5
|
+
def is_a_dc_plane?(row)
|
6
|
+
row['Designator'] =~ /^DC\d/i
|
7
|
+
end
|
8
|
+
|
9
|
+
# def is_a_crj_900?(row)
|
10
|
+
# row['Designator'].downcase == 'crj9'
|
11
|
+
# end
|
12
|
+
|
13
|
+
def is_a_g159?(row)
|
14
|
+
row['Designator'] =~ /^G159$/
|
15
|
+
end
|
16
|
+
|
17
|
+
def is_a_galx?(row)
|
18
|
+
row['Designator'] =~ /^GALX$/
|
19
|
+
end
|
20
|
+
|
21
|
+
def method_missing(method_id, *args, &block)
|
22
|
+
if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
|
23
|
+
manufacturer_name = $1
|
24
|
+
manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
|
25
|
+
matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
|
26
|
+
method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class TestErrata < Test::Unit::TestCase
|
34
|
+
should "be able to apply errata files" do
|
35
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
36
|
+
:encoding => 'windows-1252',
|
37
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
38
|
+
:column_xpath => 'td',
|
39
|
+
:errata => Errata.new(:table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'),
|
40
|
+
:responder => AircraftGuru.new)
|
41
|
+
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
42
|
+
assert g1
|
43
|
+
assert_equal 'GULFSTREAM AEROSPACE', g1['Manufacturer']
|
44
|
+
assert_equal 'Gulfstream I', g1['Model']
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
$test2_rows_with_blanks = [
|
4
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
5
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
6
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
7
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
8
|
+
]
|
9
|
+
$test2_rows = [
|
10
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
11
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
12
|
+
]
|
13
|
+
$test2_rows_with_blanks.freeze
|
14
|
+
$test2_rows.freeze
|
15
|
+
|
16
|
+
class TestOldSyntax < Test::Unit::TestCase
|
17
|
+
should "open an XLSX like an array (numbered columns)" do
|
18
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
|
19
|
+
assert_equal "Secure encryption of all data", t.rows[5][0]
|
20
|
+
end
|
21
|
+
|
22
|
+
should "open an XLSX with custom headers" do
|
23
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
|
24
|
+
assert_equal "Secure encryption of all data", t.rows[5]['foo']
|
25
|
+
end
|
26
|
+
|
27
|
+
should "open an XLSX" do
|
28
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
29
|
+
assert_equal "Secure encryption of all data", t.rows[5]["Requirements"]
|
30
|
+
end
|
31
|
+
|
32
|
+
should "work on filenames with spaces, using globbing" do
|
33
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
34
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
35
|
+
end
|
36
|
+
|
37
|
+
should "work on filenames with spaces" do
|
38
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
39
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
40
|
+
end
|
41
|
+
|
42
|
+
should "ignore UTF-8 byte order marks" do
|
43
|
+
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
44
|
+
assert_equal 'Tawleed', t.rows.first['name']
|
45
|
+
end
|
46
|
+
|
47
|
+
# this will die with an error about libcurl if your curl doesn't support ssl
|
48
|
+
should "connect using HTTPS if available" do
|
49
|
+
t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
50
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
51
|
+
assert_equal 'AL', t.rows.first['State']
|
52
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
53
|
+
assert_equal 'WY', t.rows.last['State']
|
54
|
+
end
|
55
|
+
|
56
|
+
should "read an HTML table made with frontpage" do
|
57
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
|
58
|
+
:encoding => 'US-ASCII',
|
59
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
60
|
+
:column_xpath => 'td'
|
61
|
+
assert_equal 'E110', t.rows.first['Designator']
|
62
|
+
assert_equal 'EMBRAER', t.rows.first['Manufacturer']
|
63
|
+
assert_equal 'EZKC', t.rows.last['Designator']
|
64
|
+
assert_equal 'EZ King Cobra', t.rows.last['Model']
|
65
|
+
end
|
66
|
+
|
67
|
+
should "hash rows without paying attention to order" do
|
68
|
+
x = ActiveSupport::OrderedHash.new
|
69
|
+
x[:a] = 1
|
70
|
+
x[:b] = 2
|
71
|
+
|
72
|
+
y = ActiveSupport::OrderedHash.new
|
73
|
+
y[:b] = 2
|
74
|
+
y[:a] = 1
|
75
|
+
|
76
|
+
assert_not_equal Marshal.dump(x), Marshal.dump(y)
|
77
|
+
assert_equal RemoteTable::Transform.row_hash(x), RemoteTable::Transform.row_hash(y)
|
78
|
+
end
|
79
|
+
|
80
|
+
should "open a Google Docs url (as a CSV)" do
|
81
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
82
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
83
|
+
assert_equal 'AL', t.rows.first['State']
|
84
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
85
|
+
assert_equal 'WY', t.rows.last['State']
|
86
|
+
end
|
87
|
+
|
88
|
+
should "open a Google Docs url (as a CSV, with sheet options)" do
|
89
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
|
90
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
91
|
+
assert_equal 'AL', t.rows.first['State']
|
92
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
93
|
+
assert_equal 'WY', t.rows.last['State']
|
94
|
+
end
|
95
|
+
|
96
|
+
should "open a Google Docs url as a CSV without headers" do
|
97
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
98
|
+
assert_equal 'AL', t.rows.first[0]
|
99
|
+
assert_equal 'Gulf Coast', t.rows.first[4]
|
100
|
+
assert_equal 'WY', t.rows.last[0]
|
101
|
+
assert_equal 'Rocky Mountain', t.rows.last[4]
|
102
|
+
end
|
103
|
+
|
104
|
+
should "take the last of values if the header is duplicated" do
|
105
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
106
|
+
assert_equal '2', t.rows.first['dup_header']
|
107
|
+
end
|
108
|
+
|
109
|
+
should "respect field order in CSVs without headers" do
|
110
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
111
|
+
t.rows.each do |row|
|
112
|
+
last_column_number = -1
|
113
|
+
row.each do |column_number, v|
|
114
|
+
next if column_number == 'row_hash'
|
115
|
+
assert column_number.is_a?(Numeric)
|
116
|
+
assert(column_number > last_column_number)
|
117
|
+
last_column_number = column_number
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
%w{ csv ods xls }.each do |format|
|
123
|
+
eval %{
|
124
|
+
should "read #{format}" do
|
125
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
|
126
|
+
# no blank headers
|
127
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
128
|
+
# correct values
|
129
|
+
t.rows.each_with_index do |row, index|
|
130
|
+
assert_equal row.except('row_hash'), $test2_rows[index]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
should "read #{format}, keeping blank rows" do
|
135
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
|
136
|
+
# no blank headers
|
137
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
138
|
+
# correct values
|
139
|
+
t.rows.each_with_index do |row, index|
|
140
|
+
assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
should "read fixed width correctly" do
|
147
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
148
|
+
:format => :fixed_width,
|
149
|
+
:skip => 1,
|
150
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
151
|
+
[ 'spacer', 1 ],
|
152
|
+
[ 'header5', 10, { :type => :string } ],
|
153
|
+
[ 'spacer', 12 ],
|
154
|
+
[ 'header6', 10, { :type => :string } ]])
|
155
|
+
|
156
|
+
# no blank headers
|
157
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
158
|
+
# correct values
|
159
|
+
t.rows.each_with_index do |row, index|
|
160
|
+
assert_equal row.except('row_hash'), $test2_rows[index]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
should "read fixed width correctly, keeping blank rows" do
|
165
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
166
|
+
:format => :fixed_width,
|
167
|
+
:keep_blank_rows => true,
|
168
|
+
:skip => 1,
|
169
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
170
|
+
[ 'spacer', 1 ],
|
171
|
+
[ 'header5', 10, { :type => :string } ],
|
172
|
+
[ 'spacer', 12 ],
|
173
|
+
[ 'header6', 10, { :type => :string } ]])
|
174
|
+
|
175
|
+
# no blank headers
|
176
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
177
|
+
# correct values
|
178
|
+
t.rows.each_with_index do |row, index|
|
179
|
+
assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
should "have the same row hash across formats" do
|
184
|
+
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
185
|
+
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
186
|
+
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
187
|
+
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
188
|
+
:format => :fixed_width,
|
189
|
+
:skip => 1,
|
190
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
191
|
+
[ 'spacer', 1 ],
|
192
|
+
[ 'header2', 10, { :type => :string } ],
|
193
|
+
[ 'spacer', 12 ],
|
194
|
+
[ 'header3', 10, { :type => :string } ]])
|
195
|
+
|
196
|
+
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
197
|
+
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
198
|
+
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
199
|
+
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
200
|
+
:format => :fixed_width,
|
201
|
+
:skip => 1,
|
202
|
+
:schema => [[ 'spacer', 11 ],
|
203
|
+
[ 'header2', 10, { :type => :string } ],
|
204
|
+
[ 'spacer', 1 ],
|
205
|
+
[ 'header3', 10, { :type => :string } ],
|
206
|
+
[ 'spacer', 1 ],
|
207
|
+
[ 'header1', 10, { :type => :string } ]])
|
208
|
+
|
209
|
+
|
210
|
+
reference = csv.rows[0]['row_hash']
|
211
|
+
|
212
|
+
# same row hashes
|
213
|
+
assert_equal reference, ods.rows[0]['row_hash']
|
214
|
+
assert_equal reference, xls.rows[0]['row_hash']
|
215
|
+
assert_equal reference, fixed_width.rows[0]['row_hash']
|
216
|
+
# same row hashes with different order
|
217
|
+
assert_equal reference, csv2.rows[0]['row_hash']
|
218
|
+
assert_equal reference, ods2.rows[0]['row_hash']
|
219
|
+
assert_equal reference, xls2.rows[0]['row_hash']
|
220
|
+
assert_equal reference, fixed_width2.rows[0]['row_hash']
|
221
|
+
end
|
222
|
+
|
223
|
+
should "open an ODS" do
|
224
|
+
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
225
|
+
|
226
|
+
assert_equal 'Central Africa', t.rows[5]['name']
|
227
|
+
assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class FuelOilParser
|
4
|
+
def initialize(options = {})
|
5
|
+
# nothing
|
6
|
+
end
|
7
|
+
def add_hints!(bus)
|
8
|
+
bus[:sheet] = 'Data 1'
|
9
|
+
bus[:skip] = 2
|
10
|
+
bus[:select] = lambda { |row| row['year'] > 1989 }
|
11
|
+
end
|
12
|
+
def apply(row)
|
13
|
+
virtual_rows = []
|
14
|
+
row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
|
15
|
+
first_part = $1
|
16
|
+
next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
|
17
|
+
if first_part.start_with?('U.S.')
|
18
|
+
locatable = "united_states (Country)"
|
19
|
+
elsif first_part.include?('PADD')
|
20
|
+
/\(PADD (.*)\)/.match(first_part)
|
21
|
+
padd_part = $1
|
22
|
+
next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
|
23
|
+
locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
|
24
|
+
else
|
25
|
+
locatable = "#{first_part} (State)"
|
26
|
+
end
|
27
|
+
date = Time.parse(date)
|
28
|
+
virtual_rows << {
|
29
|
+
'locatable' => locatable,
|
30
|
+
'cost' => cost,
|
31
|
+
'year' => date.year,
|
32
|
+
'month' => date.month
|
33
|
+
}
|
34
|
+
end
|
35
|
+
virtual_rows
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class TestOldTransform < Test::Unit::TestCase
|
40
|
+
should "open an XLS with a parser" do
|
41
|
+
ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
|
42
|
+
ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
|
43
|
+
|
44
|
+
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
45
|
+
:transform => { :class => FuelOilParser })
|
46
|
+
assert t.rows.include?(ma_1990_01)
|
47
|
+
assert t.rows.include?(ga_1990_01)
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestRemoteTable < Test::Unit::TestCase
|
4
|
+
should "open an XLSX" do
|
5
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
6
|
+
assert_equal "Secure encryption of all data", t[5]["Requirements"]
|
7
|
+
end
|
8
|
+
|
9
|
+
should "add a row hash to every row" do
|
10
|
+
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
11
|
+
assert_equal "59d68cfc1cd6b32f5b333d6f0e4bea6d", t[5]['row_hash']
|
12
|
+
end
|
13
|
+
end
|