remote_table 0.2.32 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
data/test/test_big.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'helper'
2
+
3
+ class TestBig < Test::Unit::TestCase
4
+ should "open an XLS inside a zip file" do
5
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
6
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
7
+ assert_equal 'NSX', t.rows.first['carline name']
8
+ assert_equal 'VOLVO', t.rows.last['Manufacturer']
9
+ assert_equal 'V70 XC AWD', t.rows.last['carline name']
10
+ end
11
+
12
+ should "not have indifferent string/symbol hash access" do
13
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
14
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
15
+ assert_equal nil, t.rows.first[:Manufacturer]
16
+ end
17
+
18
+ should "open a CSV inside a zip file" do
19
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
20
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
21
+ assert_equal 'NSX', t.rows.first['carline name']
22
+ assert_equal 'TOYOTA', t.rows.last['Manufacturer']
23
+ assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
24
+ end
25
+
26
+ should "open a fixed-width file with an inline schema inside a zip file" do
27
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
28
+ :filename => 'Gd6-dsc.txt',
29
+ :format => :fixed_width,
30
+ :crop => 21..26, # inclusive
31
+ :cut => '2-',
32
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
33
+ :schema => [[ 'code', 2, { :type => :string } ],
34
+ [ 'spacer', 2 ],
35
+ [ 'name', 52, { :type => :string } ]])
36
+ assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
37
+ assert_equal 'R', t.rows.first['code']
38
+ assert_equal 'electricity', t.rows.last['name']
39
+ assert_equal 'El', t.rows.last['code']
40
+ end
41
+
42
+ should "send form data, follow redirects and use a filename glob" do
43
+ url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
44
+ form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_CITY_NUM&VarDesc=OriginCityNum&VarType=Num&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_CITY_NUM&VarDesc=DestCityNum&VarType=Num&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
45
+ t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
46
+ assert_equal 'United States of America', t.rows.first['DEST_COUNTRY_NAME']
47
+ end
48
+
49
+ # should "provide a row_hash on demand" do
50
+ # t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
51
+ # :filename => 'Gd6-dsc.txt',
52
+ # :format => :fixed_width,
53
+ # :crop => 21..26, # inclusive
54
+ # :cut => '2-',
55
+ # :select => lambda { |row| /\A[A-Z]/.match row['code'] },
56
+ # :schema => [[ 'code', 2, { :type => :string } ],
57
+ # [ 'spacer', 2 ],
58
+ # [ 'name', 52, { :type => :string } ]])
59
+ # assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
60
+ # end
61
+ end
@@ -0,0 +1,46 @@
1
+ require 'helper'
2
+ require 'errata'
3
+
4
+ class AircraftGuru
5
+ def is_a_dc_plane?(row)
6
+ row['Designator'] =~ /^DC\d/i
7
+ end
8
+
9
+ # def is_a_crj_900?(row)
10
+ # row['Designator'].downcase == 'crj9'
11
+ # end
12
+
13
+ def is_a_g159?(row)
14
+ row['Designator'] =~ /^G159$/
15
+ end
16
+
17
+ def is_a_galx?(row)
18
+ row['Designator'] =~ /^GALX$/
19
+ end
20
+
21
+ def method_missing(method_id, *args, &block)
22
+ if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
23
+ manufacturer_name = $1
24
+ manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
25
+ matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
26
+ method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
27
+ else
28
+ super
29
+ end
30
+ end
31
+ end
32
+
33
+ class TestErrata < Test::Unit::TestCase
34
+ should "be able to apply errata files" do
35
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
36
+ :encoding => 'windows-1252',
37
+ :row_xpath => '//table/tr[2]/td/table/tr',
38
+ :column_xpath => 'td',
39
+ :errata => Errata.new(:table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'),
40
+ :responder => AircraftGuru.new)
41
+ g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
42
+ assert g1
43
+ assert_equal 'GULFSTREAM AEROSPACE', g1['Manufacturer']
44
+ assert_equal 'Gulfstream I', g1['Model']
45
+ end
46
+ end
@@ -0,0 +1,229 @@
1
+ require 'helper'
2
+
3
+ $test2_rows_with_blanks = [
4
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
5
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
6
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
7
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
8
+ ]
9
+ $test2_rows = [
10
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
11
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
12
+ ]
13
+ $test2_rows_with_blanks.freeze
14
+ $test2_rows.freeze
15
+
16
+ class TestOldSyntax < Test::Unit::TestCase
17
+ should "open an XLSX like an array (numbered columns)" do
18
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
19
+ assert_equal "Secure encryption of all data", t.rows[5][0]
20
+ end
21
+
22
+ should "open an XLSX with custom headers" do
23
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
24
+ assert_equal "Secure encryption of all data", t.rows[5]['foo']
25
+ end
26
+
27
+ should "open an XLSX" do
28
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
29
+ assert_equal "Secure encryption of all data", t.rows[5]["Requirements"]
30
+ end
31
+
32
+ should "work on filenames with spaces, using globbing" do
33
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
34
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
35
+ end
36
+
37
+ should "work on filenames with spaces" do
38
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
39
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
40
+ end
41
+
42
+ should "ignore UTF-8 byte order marks" do
43
+ t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
44
+ assert_equal 'Tawleed', t.rows.first['name']
45
+ end
46
+
47
+ # this will die with an error about libcurl if your curl doesn't support ssl
48
+ should "connect using HTTPS if available" do
49
+ t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
50
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
51
+ assert_equal 'AL', t.rows.first['State']
52
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
53
+ assert_equal 'WY', t.rows.last['State']
54
+ end
55
+
56
+ should "read an HTML table made with frontpage" do
57
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
58
+ :encoding => 'US-ASCII',
59
+ :row_xpath => '//table/tr[2]/td/table/tr',
60
+ :column_xpath => 'td'
61
+ assert_equal 'E110', t.rows.first['Designator']
62
+ assert_equal 'EMBRAER', t.rows.first['Manufacturer']
63
+ assert_equal 'EZKC', t.rows.last['Designator']
64
+ assert_equal 'EZ King Cobra', t.rows.last['Model']
65
+ end
66
+
67
+ should "hash rows without paying attention to order" do
68
+ x = ActiveSupport::OrderedHash.new
69
+ x[:a] = 1
70
+ x[:b] = 2
71
+
72
+ y = ActiveSupport::OrderedHash.new
73
+ y[:b] = 2
74
+ y[:a] = 1
75
+
76
+ assert_not_equal Marshal.dump(x), Marshal.dump(y)
77
+ assert_equal RemoteTable::Transform.row_hash(x), RemoteTable::Transform.row_hash(y)
78
+ end
79
+
80
+ should "open a Google Docs url (as a CSV)" do
81
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
82
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
83
+ assert_equal 'AL', t.rows.first['State']
84
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
85
+ assert_equal 'WY', t.rows.last['State']
86
+ end
87
+
88
+ should "open a Google Docs url (as a CSV, with sheet options)" do
89
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
90
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
91
+ assert_equal 'AL', t.rows.first['State']
92
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
93
+ assert_equal 'WY', t.rows.last['State']
94
+ end
95
+
96
+ should "open a Google Docs url as a CSV without headers" do
97
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
98
+ assert_equal 'AL', t.rows.first[0]
99
+ assert_equal 'Gulf Coast', t.rows.first[4]
100
+ assert_equal 'WY', t.rows.last[0]
101
+ assert_equal 'Rocky Mountain', t.rows.last[4]
102
+ end
103
+
104
+ should "take the last of values if the header is duplicated" do
105
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
106
+ assert_equal '2', t.rows.first['dup_header']
107
+ end
108
+
109
+ should "respect field order in CSVs without headers" do
110
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
111
+ t.rows.each do |row|
112
+ last_column_number = -1
113
+ row.each do |column_number, v|
114
+ next if column_number == 'row_hash'
115
+ assert column_number.is_a?(Numeric)
116
+ assert(column_number > last_column_number)
117
+ last_column_number = column_number
118
+ end
119
+ end
120
+ end
121
+
122
+ %w{ csv ods xls }.each do |format|
123
+ eval %{
124
+ should "read #{format}" do
125
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
126
+ # no blank headers
127
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
128
+ # correct values
129
+ t.rows.each_with_index do |row, index|
130
+ assert_equal row.except('row_hash'), $test2_rows[index]
131
+ end
132
+ end
133
+
134
+ should "read #{format}, keeping blank rows" do
135
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
136
+ # no blank headers
137
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
138
+ # correct values
139
+ t.rows.each_with_index do |row, index|
140
+ assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
141
+ end
142
+ end
143
+ }
144
+ end
145
+
146
+ should "read fixed width correctly" do
147
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
148
+ :format => :fixed_width,
149
+ :skip => 1,
150
+ :schema => [[ 'header4', 10, { :type => :string } ],
151
+ [ 'spacer', 1 ],
152
+ [ 'header5', 10, { :type => :string } ],
153
+ [ 'spacer', 12 ],
154
+ [ 'header6', 10, { :type => :string } ]])
155
+
156
+ # no blank headers
157
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
158
+ # correct values
159
+ t.rows.each_with_index do |row, index|
160
+ assert_equal row.except('row_hash'), $test2_rows[index]
161
+ end
162
+ end
163
+
164
+ should "read fixed width correctly, keeping blank rows" do
165
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
166
+ :format => :fixed_width,
167
+ :keep_blank_rows => true,
168
+ :skip => 1,
169
+ :schema => [[ 'header4', 10, { :type => :string } ],
170
+ [ 'spacer', 1 ],
171
+ [ 'header5', 10, { :type => :string } ],
172
+ [ 'spacer', 12 ],
173
+ [ 'header6', 10, { :type => :string } ]])
174
+
175
+ # no blank headers
176
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
177
+ # correct values
178
+ t.rows.each_with_index do |row, index|
179
+ assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
180
+ end
181
+ end
182
+
183
+ should "have the same row hash across formats" do
184
+ csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
185
+ ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
186
+ xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
187
+ fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
188
+ :format => :fixed_width,
189
+ :skip => 1,
190
+ :schema => [[ 'header1', 10, { :type => :string } ],
191
+ [ 'spacer', 1 ],
192
+ [ 'header2', 10, { :type => :string } ],
193
+ [ 'spacer', 12 ],
194
+ [ 'header3', 10, { :type => :string } ]])
195
+
196
+ csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
197
+ ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
198
+ xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
199
+ fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
200
+ :format => :fixed_width,
201
+ :skip => 1,
202
+ :schema => [[ 'spacer', 11 ],
203
+ [ 'header2', 10, { :type => :string } ],
204
+ [ 'spacer', 1 ],
205
+ [ 'header3', 10, { :type => :string } ],
206
+ [ 'spacer', 1 ],
207
+ [ 'header1', 10, { :type => :string } ]])
208
+
209
+
210
+ reference = csv.rows[0]['row_hash']
211
+
212
+ # same row hashes
213
+ assert_equal reference, ods.rows[0]['row_hash']
214
+ assert_equal reference, xls.rows[0]['row_hash']
215
+ assert_equal reference, fixed_width.rows[0]['row_hash']
216
+ # same row hashes with different order
217
+ assert_equal reference, csv2.rows[0]['row_hash']
218
+ assert_equal reference, ods2.rows[0]['row_hash']
219
+ assert_equal reference, xls2.rows[0]['row_hash']
220
+ assert_equal reference, fixed_width2.rows[0]['row_hash']
221
+ end
222
+
223
+ should "open an ODS" do
224
+ t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
225
+
226
+ assert_equal 'Central Africa', t.rows[5]['name']
227
+ assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
228
+ end
229
+ end
@@ -0,0 +1,49 @@
1
+ require 'helper'
2
+
3
+ class FuelOilParser
4
+ def initialize(options = {})
5
+ # nothing
6
+ end
7
+ def add_hints!(bus)
8
+ bus[:sheet] = 'Data 1'
9
+ bus[:skip] = 2
10
+ bus[:select] = lambda { |row| row['year'] > 1989 }
11
+ end
12
+ def apply(row)
13
+ virtual_rows = []
14
+ row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
15
+ first_part = $1
16
+ next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
17
+ if first_part.start_with?('U.S.')
18
+ locatable = "united_states (Country)"
19
+ elsif first_part.include?('PADD')
20
+ /\(PADD (.*)\)/.match(first_part)
21
+ padd_part = $1
22
+ next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
23
+ locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
24
+ else
25
+ locatable = "#{first_part} (State)"
26
+ end
27
+ date = Time.parse(date)
28
+ virtual_rows << {
29
+ 'locatable' => locatable,
30
+ 'cost' => cost,
31
+ 'year' => date.year,
32
+ 'month' => date.month
33
+ }
34
+ end
35
+ virtual_rows
36
+ end
37
+ end
38
+
39
+ class TestOldTransform < Test::Unit::TestCase
40
+ should "open an XLS with a parser" do
41
+ ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
42
+ ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
43
+
44
+ t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
45
+ :transform => { :class => FuelOilParser })
46
+ assert t.rows.include?(ma_1990_01)
47
+ assert t.rows.include?(ga_1990_01)
48
+ end
49
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ class TestRemoteTable < Test::Unit::TestCase
4
+ should "open an XLSX" do
5
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
6
+ assert_equal "Secure encryption of all data", t[5]["Requirements"]
7
+ end
8
+
9
+ should "add a row hash to every row" do
10
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
11
+ assert_equal "59d68cfc1cd6b32f5b333d6f0e4bea6d", t[5]['row_hash']
12
+ end
13
+ end