remote_table 0.2.32 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
data/test/test_big.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'helper'
2
+
3
+ class TestBig < Test::Unit::TestCase
4
+ should "open an XLS inside a zip file" do
5
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
6
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
7
+ assert_equal 'NSX', t.rows.first['carline name']
8
+ assert_equal 'VOLVO', t.rows.last['Manufacturer']
9
+ assert_equal 'V70 XC AWD', t.rows.last['carline name']
10
+ end
11
+
12
+ should "not have indifferent string/symbol hash access" do
13
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
14
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
15
+ assert_equal nil, t.rows.first[:Manufacturer]
16
+ end
17
+
18
+ should "open a CSV inside a zip file" do
19
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
20
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
21
+ assert_equal 'NSX', t.rows.first['carline name']
22
+ assert_equal 'TOYOTA', t.rows.last['Manufacturer']
23
+ assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
24
+ end
25
+
26
+ should "open a fixed-width file with an inline schema inside a zip file" do
27
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
28
+ :filename => 'Gd6-dsc.txt',
29
+ :format => :fixed_width,
30
+ :crop => 21..26, # inclusive
31
+ :cut => '2-',
32
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
33
+ :schema => [[ 'code', 2, { :type => :string } ],
34
+ [ 'spacer', 2 ],
35
+ [ 'name', 52, { :type => :string } ]])
36
+ assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
37
+ assert_equal 'R', t.rows.first['code']
38
+ assert_equal 'electricity', t.rows.last['name']
39
+ assert_equal 'El', t.rows.last['code']
40
+ end
41
+
42
+ should "send form data, follow redirects and use a filename glob" do
43
+ url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
44
+ form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_CITY_NUM&VarDesc=OriginCityNum&VarType=Num&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_CITY_NUM&VarDesc=DestCityNum&VarType=Num&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
45
+ t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
46
+ assert_equal 'United States of America', t.rows.first['DEST_COUNTRY_NAME']
47
+ end
48
+
49
+ # should "provide a row_hash on demand" do
50
+ # t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
51
+ # :filename => 'Gd6-dsc.txt',
52
+ # :format => :fixed_width,
53
+ # :crop => 21..26, # inclusive
54
+ # :cut => '2-',
55
+ # :select => lambda { |row| /\A[A-Z]/.match row['code'] },
56
+ # :schema => [[ 'code', 2, { :type => :string } ],
57
+ # [ 'spacer', 2 ],
58
+ # [ 'name', 52, { :type => :string } ]])
59
+ # assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
60
+ # end
61
+ end
@@ -0,0 +1,46 @@
1
+ require 'helper'
2
+ require 'errata'
3
+
4
+ class AircraftGuru
5
+ def is_a_dc_plane?(row)
6
+ row['Designator'] =~ /^DC\d/i
7
+ end
8
+
9
+ # def is_a_crj_900?(row)
10
+ # row['Designator'].downcase == 'crj9'
11
+ # end
12
+
13
+ def is_a_g159?(row)
14
+ row['Designator'] =~ /^G159$/
15
+ end
16
+
17
+ def is_a_galx?(row)
18
+ row['Designator'] =~ /^GALX$/
19
+ end
20
+
21
+ def method_missing(method_id, *args, &block)
22
+ if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
23
+ manufacturer_name = $1
24
+ manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
25
+ matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
26
+ method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
27
+ else
28
+ super
29
+ end
30
+ end
31
+ end
32
+
33
+ class TestErrata < Test::Unit::TestCase
34
+ should "be able to apply errata files" do
35
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
36
+ :encoding => 'windows-1252',
37
+ :row_xpath => '//table/tr[2]/td/table/tr',
38
+ :column_xpath => 'td',
39
+ :errata => Errata.new(:table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'),
40
+ :responder => AircraftGuru.new)
41
+ g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
42
+ assert g1
43
+ assert_equal 'GULFSTREAM AEROSPACE', g1['Manufacturer']
44
+ assert_equal 'Gulfstream I', g1['Model']
45
+ end
46
+ end
@@ -0,0 +1,229 @@
1
+ require 'helper'
2
+
3
+ $test2_rows_with_blanks = [
4
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
5
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
6
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
7
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
8
+ ]
9
+ $test2_rows = [
10
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
11
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
12
+ ]
13
+ $test2_rows_with_blanks.freeze
14
+ $test2_rows.freeze
15
+
16
+ class TestOldSyntax < Test::Unit::TestCase
17
+ should "open an XLSX like an array (numbered columns)" do
18
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
19
+ assert_equal "Secure encryption of all data", t.rows[5][0]
20
+ end
21
+
22
+ should "open an XLSX with custom headers" do
23
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
24
+ assert_equal "Secure encryption of all data", t.rows[5]['foo']
25
+ end
26
+
27
+ should "open an XLSX" do
28
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
29
+ assert_equal "Secure encryption of all data", t.rows[5]["Requirements"]
30
+ end
31
+
32
+ should "work on filenames with spaces, using globbing" do
33
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
34
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
35
+ end
36
+
37
+ should "work on filenames with spaces" do
38
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
39
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
40
+ end
41
+
42
+ should "ignore UTF-8 byte order marks" do
43
+ t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
44
+ assert_equal 'Tawleed', t.rows.first['name']
45
+ end
46
+
47
+ # this will die with an error about libcurl if your curl doesn't support ssl
48
+ should "connect using HTTPS if available" do
49
+ t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
50
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
51
+ assert_equal 'AL', t.rows.first['State']
52
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
53
+ assert_equal 'WY', t.rows.last['State']
54
+ end
55
+
56
+ should "read an HTML table made with frontpage" do
57
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
58
+ :encoding => 'US-ASCII',
59
+ :row_xpath => '//table/tr[2]/td/table/tr',
60
+ :column_xpath => 'td'
61
+ assert_equal 'E110', t.rows.first['Designator']
62
+ assert_equal 'EMBRAER', t.rows.first['Manufacturer']
63
+ assert_equal 'EZKC', t.rows.last['Designator']
64
+ assert_equal 'EZ King Cobra', t.rows.last['Model']
65
+ end
66
+
67
+ should "hash rows without paying attention to order" do
68
+ x = ActiveSupport::OrderedHash.new
69
+ x[:a] = 1
70
+ x[:b] = 2
71
+
72
+ y = ActiveSupport::OrderedHash.new
73
+ y[:b] = 2
74
+ y[:a] = 1
75
+
76
+ assert_not_equal Marshal.dump(x), Marshal.dump(y)
77
+ assert_equal RemoteTable::Transform.row_hash(x), RemoteTable::Transform.row_hash(y)
78
+ end
79
+
80
+ should "open a Google Docs url (as a CSV)" do
81
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
82
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
83
+ assert_equal 'AL', t.rows.first['State']
84
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
85
+ assert_equal 'WY', t.rows.last['State']
86
+ end
87
+
88
+ should "open a Google Docs url (as a CSV, with sheet options)" do
89
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
90
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
91
+ assert_equal 'AL', t.rows.first['State']
92
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
93
+ assert_equal 'WY', t.rows.last['State']
94
+ end
95
+
96
+ should "open a Google Docs url as a CSV without headers" do
97
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
98
+ assert_equal 'AL', t.rows.first[0]
99
+ assert_equal 'Gulf Coast', t.rows.first[4]
100
+ assert_equal 'WY', t.rows.last[0]
101
+ assert_equal 'Rocky Mountain', t.rows.last[4]
102
+ end
103
+
104
+ should "take the last of values if the header is duplicated" do
105
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
106
+ assert_equal '2', t.rows.first['dup_header']
107
+ end
108
+
109
+ should "respect field order in CSVs without headers" do
110
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
111
+ t.rows.each do |row|
112
+ last_column_number = -1
113
+ row.each do |column_number, v|
114
+ next if column_number == 'row_hash'
115
+ assert column_number.is_a?(Numeric)
116
+ assert(column_number > last_column_number)
117
+ last_column_number = column_number
118
+ end
119
+ end
120
+ end
121
+
122
+ %w{ csv ods xls }.each do |format|
123
+ eval %{
124
+ should "read #{format}" do
125
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
126
+ # no blank headers
127
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
128
+ # correct values
129
+ t.rows.each_with_index do |row, index|
130
+ assert_equal row.except('row_hash'), $test2_rows[index]
131
+ end
132
+ end
133
+
134
+ should "read #{format}, keeping blank rows" do
135
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
136
+ # no blank headers
137
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
138
+ # correct values
139
+ t.rows.each_with_index do |row, index|
140
+ assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
141
+ end
142
+ end
143
+ }
144
+ end
145
+
146
+ should "read fixed width correctly" do
147
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
148
+ :format => :fixed_width,
149
+ :skip => 1,
150
+ :schema => [[ 'header4', 10, { :type => :string } ],
151
+ [ 'spacer', 1 ],
152
+ [ 'header5', 10, { :type => :string } ],
153
+ [ 'spacer', 12 ],
154
+ [ 'header6', 10, { :type => :string } ]])
155
+
156
+ # no blank headers
157
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
158
+ # correct values
159
+ t.rows.each_with_index do |row, index|
160
+ assert_equal row.except('row_hash'), $test2_rows[index]
161
+ end
162
+ end
163
+
164
+ should "read fixed width correctly, keeping blank rows" do
165
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
166
+ :format => :fixed_width,
167
+ :keep_blank_rows => true,
168
+ :skip => 1,
169
+ :schema => [[ 'header4', 10, { :type => :string } ],
170
+ [ 'spacer', 1 ],
171
+ [ 'header5', 10, { :type => :string } ],
172
+ [ 'spacer', 12 ],
173
+ [ 'header6', 10, { :type => :string } ]])
174
+
175
+ # no blank headers
176
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
177
+ # correct values
178
+ t.rows.each_with_index do |row, index|
179
+ assert_equal row.except('row_hash'), $test2_rows_with_blanks[index]
180
+ end
181
+ end
182
+
183
+ should "have the same row hash across formats" do
184
+ csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
185
+ ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
186
+ xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
187
+ fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
188
+ :format => :fixed_width,
189
+ :skip => 1,
190
+ :schema => [[ 'header1', 10, { :type => :string } ],
191
+ [ 'spacer', 1 ],
192
+ [ 'header2', 10, { :type => :string } ],
193
+ [ 'spacer', 12 ],
194
+ [ 'header3', 10, { :type => :string } ]])
195
+
196
+ csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
197
+ ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
198
+ xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
199
+ fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
200
+ :format => :fixed_width,
201
+ :skip => 1,
202
+ :schema => [[ 'spacer', 11 ],
203
+ [ 'header2', 10, { :type => :string } ],
204
+ [ 'spacer', 1 ],
205
+ [ 'header3', 10, { :type => :string } ],
206
+ [ 'spacer', 1 ],
207
+ [ 'header1', 10, { :type => :string } ]])
208
+
209
+
210
+ reference = csv.rows[0]['row_hash']
211
+
212
+ # same row hashes
213
+ assert_equal reference, ods.rows[0]['row_hash']
214
+ assert_equal reference, xls.rows[0]['row_hash']
215
+ assert_equal reference, fixed_width.rows[0]['row_hash']
216
+ # same row hashes with different order
217
+ assert_equal reference, csv2.rows[0]['row_hash']
218
+ assert_equal reference, ods2.rows[0]['row_hash']
219
+ assert_equal reference, xls2.rows[0]['row_hash']
220
+ assert_equal reference, fixed_width2.rows[0]['row_hash']
221
+ end
222
+
223
+ should "open an ODS" do
224
+ t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
225
+
226
+ assert_equal 'Central Africa', t.rows[5]['name']
227
+ assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
228
+ end
229
+ end
@@ -0,0 +1,49 @@
1
+ require 'helper'
2
+
3
+ class FuelOilParser
4
+ def initialize(options = {})
5
+ # nothing
6
+ end
7
+ def add_hints!(bus)
8
+ bus[:sheet] = 'Data 1'
9
+ bus[:skip] = 2
10
+ bus[:select] = lambda { |row| row['year'] > 1989 }
11
+ end
12
+ def apply(row)
13
+ virtual_rows = []
14
+ row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
15
+ first_part = $1
16
+ next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
17
+ if first_part.start_with?('U.S.')
18
+ locatable = "united_states (Country)"
19
+ elsif first_part.include?('PADD')
20
+ /\(PADD (.*)\)/.match(first_part)
21
+ padd_part = $1
22
+ next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
23
+ locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
24
+ else
25
+ locatable = "#{first_part} (State)"
26
+ end
27
+ date = Time.parse(date)
28
+ virtual_rows << {
29
+ 'locatable' => locatable,
30
+ 'cost' => cost,
31
+ 'year' => date.year,
32
+ 'month' => date.month
33
+ }
34
+ end
35
+ virtual_rows
36
+ end
37
+ end
38
+
39
+ class TestOldTransform < Test::Unit::TestCase
40
+ should "open an XLS with a parser" do
41
+ ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
42
+ ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
43
+
44
+ t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
45
+ :transform => { :class => FuelOilParser })
46
+ assert t.rows.include?(ma_1990_01)
47
+ assert t.rows.include?(ga_1990_01)
48
+ end
49
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ class TestRemoteTable < Test::Unit::TestCase
4
+ should "open an XLSX" do
5
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
6
+ assert_equal "Secure encryption of all data", t[5]["Requirements"]
7
+ end
8
+
9
+ should "add a row hash to every row" do
10
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
11
+ assert_equal "59d68cfc1cd6b32f5b333d6f0e4bea6d", t[5]['row_hash']
12
+ end
13
+ end