data_miner 0.5.5 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -84,6 +84,7 @@ module DataMiner
84
84
  return value if value.is_a? ActiveRecord::Base # carry through trapdoor
85
85
  value = value_in_dictionary value if wants_dictionary?
86
86
  value = synthesize.call(row) if wants_synthesize?
87
+ value = nil if value.blank? and wants_nullification?
87
88
  value
88
89
  end
89
90
 
@@ -150,7 +151,7 @@ module DataMiner
150
151
  options.has_key? :static
151
152
  end
152
153
  def wants_nullification?
153
- nullify != false
154
+ nullify == true
154
155
  end
155
156
  def wants_chars?
156
157
  chars.present?
@@ -0,0 +1,55 @@
1
+ require 'test_helper'
2
+
3
+ class AttributeTest < Test::Unit::TestCase
4
+ context '#value_from_row' do
5
+ setup do
6
+ @airport = Airport.new
7
+ end
8
+ context 'nullify is true' do
9
+ setup do
10
+ @attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
11
+ end
12
+ should 'return nil if field is blank' do
13
+ assert_nil @attribute.value_from_row(
14
+ 'name' => 'DTW',
15
+ 'city' => 'Warren',
16
+ 'country_name' => 'US',
17
+ 'latitude' => '',
18
+ 'longitude' => ''
19
+ )
20
+ end
21
+ should 'return the value if field is not blank' do
22
+ assert_equal '12.34', @attribute.value_from_row(
23
+ 'name' => 'DTW',
24
+ 'city' => 'Warren',
25
+ 'country_name' => 'US',
26
+ 'latitude' => '12.34',
27
+ 'longitude' => ''
28
+ )
29
+ end
30
+ end
31
+ context 'nullify is false' do
32
+ setup do
33
+ @attribute = DataMiner::Attribute.new @airport, 'latitude'
34
+ end
35
+ should 'return the value if field is not blank' do
36
+ assert_equal '12.34', @attribute.value_from_row(
37
+ 'name' => 'DTW',
38
+ 'city' => 'Warren',
39
+ 'country_name' => 'US',
40
+ 'latitude' => '12.34',
41
+ 'longitude' => ''
42
+ )
43
+ end
44
+ should 'return blank if field is blank' do
45
+ assert_equal '', @attribute.value_from_row(
46
+ 'name' => 'DTW',
47
+ 'city' => 'Warren',
48
+ 'country_name' => 'US',
49
+ 'latitude' => '',
50
+ 'longitude' => ''
51
+ )
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,446 +1,5 @@
1
1
  require 'test_helper'
2
2
 
3
- class AutomobileFuelType < ActiveRecord::Base
4
- set_primary_key :code
5
-
6
- data_miner do
7
- import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
8
- :filename => 'Gd6-dsc.txt',
9
- :format => :fixed_width,
10
- :crop => 21..26, # inclusive
11
- :cut => '2-',
12
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
13
- :schema => [[ 'code', 2, { :type => :string } ],
14
- [ 'spacer', 2 ],
15
- [ 'name', 52, { :type => :string } ]]) do
16
- key 'code'
17
- store 'name'
18
- end
19
-
20
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
21
- key 'code'
22
- store 'name'
23
- store 'annual_distance'
24
- store 'emission_factor'
25
- end
26
-
27
- # pull electricity emission factor from residential electricity
28
- import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
29
- :select => lambda { |row| row['code'] == 'El' }) do
30
- key 'code'
31
- store 'name'
32
- store 'emission_factor'
33
- end
34
-
35
- # still need distance estimate for electric cars
36
- end
37
-
38
- CODES = {
39
- :electricity => 'El',
40
- :diesel => 'D'
41
- }
42
- end
43
-
44
- class AutomobileVariant < ActiveRecord::Base
45
- set_primary_key :row_hash
46
-
47
- module FuelEconomyGuide
48
- TRANSMISSIONS = {
49
- 'A' => 'automatic',
50
- 'M' => 'manual',
51
- 'L' => 'automatic', # Lockup/automatic
52
- 'S' => 'semiautomatic', # Semiautomatic
53
- 'C' => 'manual' # TODO verify for VW Syncro
54
- }
55
-
56
- ENGINE_TYPES = {
57
- '(GUZZLER)' => nil, # "gas guzzler"
58
- '(POLICE)' => nil, # police automobile_variant
59
- '(MPFI)' => 'injection',
60
- '(MPI*)' => 'injection',
61
- '(SPFI)' => 'injection',
62
- '(FFS)' => 'injection',
63
- '(TURBO)' => 'turbo',
64
- '(TRBO)' => 'turbo',
65
- '(TC*)' => 'turbo',
66
- '(FFS,TRBO)' => %w(injection turbo),
67
- '(S-CHARGE)' => 'supercharger',
68
- '(SC*)' => 'supercharger',
69
- '(DIESEL)' => nil, # diesel
70
- '(DSL)' => nil, # diesel
71
- '(ROTARY)' => nil, # rotary
72
- '(VARIABLE)' => nil, # variable displacement
73
- '(NO-CAT)' => nil, # no catalytic converter
74
- '(OHC)' => nil, # overhead camshaft
75
- '(OHV)' => nil, # overhead valves
76
- '(16-VALVE)' => nil, # 16V
77
- '(305)' => nil, # 305 cubic inch displacement
78
- '(307)' => nil, # 307 cubic inch displacement
79
- '(M-ENG)' => nil,
80
- '(W-ENG)' => nil,
81
- '(GM-BUICK)' => nil,
82
- '(GM-CHEV)' => nil,
83
- '(GM-OLDS)' => nil,
84
- '(GM-PONT)' => nil,
85
- }
86
-
87
- class ParserB
88
- attr_accessor :year
89
- def initialize(options = {})
90
- @year = options[:year]
91
- end
92
-
93
- def apply(row)
94
- row.merge!({
95
- 'make' => row['carline_mfr_name'], # make it line up with the errata
96
- 'model' => row['carline_name'], # ditto
97
- 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
98
- 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
99
- 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
100
- 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
101
- 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
102
- 'displacement' => _displacement(row['opt_disp']),
103
- 'year' => year
104
- })
105
- row
106
- end
107
-
108
- def _displacement(str)
109
- str = str.gsub(/[\(\)]/, '').strip
110
- if str =~ /^(.+)L$/
111
- $1.to_f
112
- elsif str =~ /^(.+)CC$/
113
- $1.to_f / 1000
114
- end
115
- end
116
-
117
- def add_hints!(bus)
118
- bus[:format] = :fixed_width
119
- bus[:cut] = '13-' if year == 1995
120
- bus[:schema_name] = :fuel_economy_guide_b
121
- bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
122
- Slither.define :fuel_economy_guide_b do |d|
123
- d.rows do |row|
124
- row.trap { true } # there's only one section
125
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
126
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
127
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
128
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
129
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
130
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
131
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
132
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
133
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
134
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
135
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
136
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
137
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
138
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
139
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
140
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
141
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
142
- row.spacer 2
143
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
144
- row.spacer 2
145
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
146
- row.spacer 2
147
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
148
- row.spacer 2
149
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
150
- row.spacer 2
151
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
152
- row.spacer 2
153
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
154
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
155
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
156
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
157
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
158
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
159
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
160
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
161
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
162
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
163
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
164
- row.column 'filler' , 1, :type => :string # NOT USED
165
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
166
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
167
- end
168
- end
169
- end
170
- end
171
- class ParserC
172
- attr_accessor :year
173
- def initialize(options = {})
174
- @year = options[:year]
175
- end
176
-
177
- def add_hints!(bus)
178
- # File will decide format based on filename
179
- end
180
-
181
- def apply(row)
182
- row.merge!({
183
- 'make' => row['Manufacturer'], # make it line up with the errata
184
- 'model' => row['carline name'], # ditto
185
- 'drive' => row['drv'] + 'WD',
186
- 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
187
- 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
188
- 'turbo' => row['T'] == 'T',
189
- 'supercharger' => row['S'] == 'S',
190
- 'injection' => true,
191
- 'year' => year
192
- })
193
- row
194
- end
195
- end
196
- class ParserD
197
- attr_accessor :year
198
- def initialize(options = {})
199
- @year = options[:year]
200
- end
201
-
202
- def add_hints!(bus)
203
- bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
204
- end
205
-
206
- def apply(row)
207
- row.merge!({
208
- 'make' => row['MFR'], # make it line up with the errata
209
- 'model' => row['CAR LINE'], # ditto
210
- 'drive' => row['DRIVE SYS'] + 'WD',
211
- 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
212
- 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
213
- 'turbo' => row['TURBO'] == 'T',
214
- 'supercharger' => row['SPCHGR'] == 'S',
215
- 'injection' => true,
216
- 'year' => year
217
- })
218
- row
219
- end
220
- end
221
- end
222
-
223
- class Guru
224
- # the following matching methods are needed by the errata
225
- # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
226
-
227
- def transmission_is_blank?(row)
228
- row['transmission'].blank?
229
- end
230
-
231
- def is_a_2007_gmc_or_chevrolet?(row)
232
- row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
233
- end
234
-
235
- def is_a_porsche?(row)
236
- row['make'].upcase == 'PORSCHE'
237
- end
238
-
239
- def is_not_a_porsche?(row)
240
- !is_a_porsche? row
241
- end
242
-
243
- def is_a_mercedes_benz?(row)
244
- row['make'] =~ /MERCEDES/i
245
- end
246
-
247
- def is_a_lexus?(row)
248
- row['make'].upcase == 'LEXUS'
249
- end
250
-
251
- def is_a_bmw?(row)
252
- row['make'].upcase == 'BMW'
253
- end
254
-
255
- def is_a_ford?(row)
256
- row['make'].upcase == 'FORD'
257
- end
258
-
259
- def is_a_rolls_royce_and_model_contains_bentley?(row)
260
- is_a_rolls_royce?(row) and model_contains_bentley?(row)
261
- end
262
-
263
- def is_a_bentley?(row)
264
- row['make'].upcase == 'BENTLEY'
265
- end
266
-
267
- def is_a_rolls_royce?(row)
268
- row['make'] =~ /ROLLS/i
269
- end
270
-
271
- def is_a_turbo_brooklands?(row)
272
- row['model'] =~ /TURBO R\/RL BKLDS/i
273
- end
274
-
275
- def model_contains_maybach?(row)
276
- row['model'] =~ /MAYBACH/i
277
- end
278
-
279
- def model_contains_bentley?(row)
280
- row['model'] =~ /BENTLEY/i
281
- end
282
- end
283
-
284
- errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
285
- :responder => AutomobileVariant::Guru.new
286
-
287
- data_miner do
288
- # 1985---1997
289
- (85..97).each do |yy|
290
- filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
291
- import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
292
- :filename => filename,
293
- :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
294
- :errata => errata) do
295
- key 'row_hash'
296
- store 'make_name', :field_name => 'make'
297
- store 'model_name', :field_name => 'model'
298
- store 'year'
299
- store 'fuel_type_code', :field_name => 'fuel_type'
300
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
301
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
302
- store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
303
- store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
304
- store 'cylinders', :field_name => 'no_cyc'
305
- store 'drive', :field_name => 'drive_system'
306
- store 'carline_mfr_code'
307
- store 'vi_mfr_code'
308
- store 'carline_code'
309
- store 'carline_class_code', :field_name => 'carline_clss'
310
- store 'transmission'
311
- store 'speeds'
312
- store 'turbo'
313
- store 'supercharger'
314
- store 'injection'
315
- store 'displacement'
316
- end
317
- end
318
-
319
- # 1998--2005
320
- {
321
- 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
322
- 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
323
- 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
324
- 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
325
- 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
326
- 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
327
- 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
328
- 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
329
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
330
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
331
- :errata => errata) do
332
- key 'row_hash'
333
- store 'make_name', :field_name => 'make'
334
- store 'model_name', :field_name => 'model'
335
- store 'fuel_type_code', :field_name => 'fl'
336
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
337
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
338
- store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
339
- store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
340
- store 'cylinders', :field_name => 'cyl'
341
- store 'displacement', :field_name => 'displ'
342
- store 'carline_class_code', :field_name => 'cls' if year >= 2000
343
- store 'carline_class_name', :field_name => 'Class'
344
- store 'year'
345
- store 'transmission'
346
- store 'speeds'
347
- store 'turbo'
348
- store 'supercharger'
349
- store 'injection'
350
- store 'drive'
351
- end
352
- end
353
-
354
- # 2006--2010
355
- {
356
- 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
357
- 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
358
- 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
359
- 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
360
- # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
361
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
362
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
363
- :errata => errata) do
364
- key 'row_hash'
365
- store 'make_name', :field_name => 'make'
366
- store 'model_name', :field_name => 'model'
367
- store 'fuel_type_code', :field_name => 'FUEL TYPE'
368
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
369
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
370
- store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
371
- store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
372
- store 'cylinders', :field_name => 'NUMB CYL'
373
- store 'displacement', :field_name => 'DISPLACEMENT'
374
- store 'carline_class_code', :field_name => 'CLS'
375
- store 'carline_class_name', :field_name => 'CLASS'
376
- store 'year'
377
- store 'transmission'
378
- store 'speeds'
379
- store 'turbo'
380
- store 'supercharger'
381
- store 'injection'
382
- store 'drive'
383
- end
384
- end
385
-
386
- # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
387
- # derive :automobile_model_id # creates models by name
388
- # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
389
-
390
- process 'Set adjusted fuel economy' do
391
- update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
392
- update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
393
- end
394
- end
395
-
396
- def name
397
- extra = []
398
- extra << "V#{cylinders}" if cylinders
399
- extra << "#{displacement}L" if displacement
400
- extra << "turbo" if turbo
401
- extra << "FI" if injection
402
- extra << "#{speeds}spd" if speeds.present?
403
- extra << transmission if transmission.present?
404
- extra << "(#{fuel_type.name})" if fuel_type
405
- extra.join(' ')
406
- end
407
-
408
- def fuel_economy_description
409
- [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
410
- end
411
- end
412
-
413
- class Country < ActiveRecord::Base
414
- set_primary_key :iso_3166
415
-
416
- data_miner do
417
- import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
418
- key 'iso_3166', :field_number => 1
419
- store 'name', :field_number => 0
420
- end
421
-
422
- import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
423
- key 'iso_3166', :field_name => 'country code'
424
- store 'name', :field_name => 'country'
425
- end
426
- end
427
- end
428
-
429
- class Airport < ActiveRecord::Base
430
- set_primary_key :iata_code
431
-
432
- data_miner do
433
- import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
434
- key 'iata_code', :field_number => 4
435
- store 'name', :field_number => 1
436
- store 'city', :field_number => 2
437
- store 'country_name', :field_number => 3
438
- store 'latitude', :field_number => 6
439
- store 'longitude', :field_number => 7
440
- end
441
- end
442
- end
443
-
444
3
  class TappedAirport < ActiveRecord::Base
445
4
  set_primary_key :iata_code
446
5
 
@@ -947,103 +506,6 @@ class T100FlightSegment < ActiveRecord::Base
947
506
  end
948
507
  end
949
508
 
950
- require 'loose_tight_dictionary'
951
- class Aircraft < ActiveRecord::Base
952
- set_primary_key :icao_code
953
-
954
- def self.bts_dictionary
955
- @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
956
- :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
957
- :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
958
- :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
959
- :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
960
- :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
961
- end
962
-
963
- class BtsAircraftTypeCodeMatcher
964
- def match(left_record)
965
- right_record = Aircraft.bts_dictionary.left_to_right left_record
966
- right_record['Aircraft Type'] if right_record
967
- end
968
- end
969
-
970
- class BtsNameMatcher
971
- def match(left_record)
972
- right_record = Aircraft.bts_dictionary.left_to_right left_record
973
- right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
974
- end
975
- end
976
-
977
- class Guru
978
- # for errata
979
- def is_attributed_to_boeing?(row)
980
- row['Manufacturer'] =~ /BOEING/i
981
- end
982
-
983
- def is_attributed_to_cessna?(row)
984
- row['Manufacturer'] =~ /CESSNA/i
985
- end
986
-
987
- def is_attributed_to_fokker?(row)
988
- row['Manufacturer'] =~ /FOKKER/i
989
- end
990
-
991
- def is_not_attributed_to_aerospatiale?(row)
992
- not row['Manufacturer'] =~ /AEROSPATIALE/i
993
- end
994
-
995
- def is_not_attributed_to_cessna?(row)
996
- not row['Manufacturer'] =~ /CESSNA/i
997
- end
998
-
999
- def is_not_attributed_to_learjet?(row)
1000
- not row['Manufacturer'] =~ /LEAR/i
1001
- end
1002
-
1003
- def is_not_attributed_to_dehavilland?(row)
1004
- not row['Manufacturer'] =~ /DE ?HAVILLAND/i
1005
- end
1006
-
1007
- def is_not_attributed_to_mcdonnell_douglas?(row)
1008
- not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
1009
- end
1010
-
1011
- def is_not_a_dc_plane?(row)
1012
- not row['Model'] =~ /DC/i
1013
- end
1014
-
1015
- def is_a_crj_900?(row)
1016
- row['Designator'].downcase == 'crj9'
1017
- end
1018
- end
1019
-
1020
- data_miner do
1021
- # ('A'..'Z').each do |letter|
1022
- # Note: for the purposes of testing, only importing "D"
1023
- %w{ D }.each do |letter|
1024
- import("ICAO codes starting with letter #{letter} used by the FAA",
1025
- :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
1026
- :encoding => 'US-ASCII',
1027
- :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
1028
- :responder => Aircraft::Guru.new),
1029
- :row_xpath => '//table/tr[2]/td/table/tr',
1030
- :column_xpath => 'td') do
1031
- key 'icao_code', :field_name => 'Designator'
1032
- store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
1033
- store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
1034
- store 'manufacturer_name', :field_name => 'Manufacturer'
1035
- store 'name', :field_name => 'Model'
1036
- end
1037
-
1038
- import 'Brighter Planet aircraft class codes',
1039
- :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
1040
- key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
1041
- store 'brighter_planet_aircraft_class_code'
1042
- end
1043
- end
1044
- end
1045
- end
1046
-
1047
509
  # note that this depends on stuff in Aircraft
1048
510
  class AircraftDeux < ActiveRecord::Base
1049
511
  set_primary_key :icao_code
@@ -1166,7 +628,19 @@ end
1166
628
 
1167
629
  # todo: have somebody properly organize these
1168
630
  class DataMinerTest < Test::Unit::TestCase
1169
- if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
631
+ if ENV['WIP']
632
+ context 'with nullify option' do
633
+ should 'treat blank fields as null values' do
634
+ Aircraft.delete_all
635
+ Aircraft.data_miner_runs.delete_all
636
+ Aircraft.run_data_miner!
637
+ assert_greater_than 0, Aircraft.count
638
+ assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
639
+ end
640
+ end
641
+ end
642
+
643
+ if ENV['ALL'] == 'true'
1170
644
  should 'directly create a table for the model' do
1171
645
  if AutomobileMakeFleetYear.table_exists?
1172
646
  ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
@@ -0,0 +1,99 @@
1
+ require 'loose_tight_dictionary'
2
+
3
+ class Aircraft < ActiveRecord::Base
4
+ set_primary_key :icao_code
5
+ set_table_name 'aircraft'
6
+
7
+ def self.bts_dictionary
8
+ @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
9
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
10
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
11
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
12
+ :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
13
+ :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
14
+ end
15
+
16
+ class BtsAircraftTypeCodeMatcher
17
+ def match(left_record)
18
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
19
+ right_record['Aircraft Type'] if right_record
20
+ end
21
+ end
22
+
23
+ class BtsNameMatcher
24
+ def match(left_record)
25
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
26
+ right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
27
+ end
28
+ end
29
+
30
+ class Guru
31
+ # for errata
32
+ def is_attributed_to_boeing?(row)
33
+ row['Manufacturer'] =~ /BOEING/i
34
+ end
35
+
36
+ def is_attributed_to_cessna?(row)
37
+ row['Manufacturer'] =~ /CESSNA/i
38
+ end
39
+
40
+ def is_attributed_to_fokker?(row)
41
+ row['Manufacturer'] =~ /FOKKER/i
42
+ end
43
+
44
+ def is_not_attributed_to_aerospatiale?(row)
45
+ not row['Manufacturer'] =~ /AEROSPATIALE/i
46
+ end
47
+
48
+ def is_not_attributed_to_cessna?(row)
49
+ not row['Manufacturer'] =~ /CESSNA/i
50
+ end
51
+
52
+ def is_not_attributed_to_learjet?(row)
53
+ not row['Manufacturer'] =~ /LEAR/i
54
+ end
55
+
56
+ def is_not_attributed_to_dehavilland?(row)
57
+ not row['Manufacturer'] =~ /DE ?HAVILLAND/i
58
+ end
59
+
60
+ def is_not_attributed_to_mcdonnell_douglas?(row)
61
+ not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
62
+ end
63
+
64
+ def is_not_a_dc_plane?(row)
65
+ not row['Model'] =~ /DC/i
66
+ end
67
+
68
+ def is_a_crj_900?(row)
69
+ row['Designator'].downcase == 'crj9'
70
+ end
71
+ end
72
+
73
+ data_miner do
74
+ # ('A'..'Z').each do |letter|
75
+ # Note: for the purposes of testing, only importing "D"
76
+ %w{ D }.each do |letter|
77
+ import("ICAO codes starting with letter #{letter} used by the FAA",
78
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
79
+ :encoding => 'US-ASCII',
80
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
81
+ :responder => Aircraft::Guru.new),
82
+ :row_xpath => '//table/tr[2]/td/table/tr',
83
+ :column_xpath => 'td') do
84
+ key 'icao_code', :field_name => 'Designator'
85
+ store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new, :nullify => true
86
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new, :nullify => true
87
+ store 'manufacturer_name', :field_name => 'Manufacturer', :nullify => true
88
+ store 'name', :field_name => 'Model', :nullify => true
89
+ end
90
+
91
+ import 'Brighter Planet aircraft class codes',
92
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
93
+ key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
94
+ store 'brighter_planet_aircraft_class_code', :nullify => true
95
+ end
96
+ end
97
+ end
98
+ end
99
+
@@ -0,0 +1,14 @@
1
+ class Airport < ActiveRecord::Base
2
+ set_primary_key :iata_code
3
+
4
+ data_miner do
5
+ import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
6
+ key 'iata_code', :field_number => 4
7
+ store 'name', :field_number => 1
8
+ store 'city', :field_number => 2
9
+ store 'country_name', :field_number => 3
10
+ store 'latitude', :field_number => 6, :nullify => true
11
+ store 'longitude', :field_number => 7, :nullify => true
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,40 @@
1
+ class AutomobileFuelType < ActiveRecord::Base
2
+ set_primary_key :code
3
+
4
+ data_miner do
5
+ import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
6
+ :filename => 'Gd6-dsc.txt',
7
+ :format => :fixed_width,
8
+ :crop => 21..26, # inclusive
9
+ :cut => '2-',
10
+ :select => lambda { |row| /\A[A-Z]/.match row[:code] },
11
+ :schema => [[ 'code', 2, { :type => :string } ],
12
+ [ 'spacer', 2 ],
13
+ [ 'name', 52, { :type => :string } ]]) do
14
+ key 'code'
15
+ store 'name'
16
+ end
17
+
18
+ import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
19
+ key 'code'
20
+ store 'name'
21
+ store 'annual_distance'
22
+ store 'emission_factor'
23
+ end
24
+
25
+ # pull electricity emission factor from residential electricity
26
+ import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
27
+ :select => lambda { |row| row['code'] == 'El' }) do
28
+ key 'code'
29
+ store 'name'
30
+ store 'emission_factor'
31
+ end
32
+
33
+ # still need distance estimate for electric cars
34
+ end
35
+
36
+ CODES = {
37
+ :electricity => 'El',
38
+ :diesel => 'D'
39
+ }
40
+ end
@@ -0,0 +1,368 @@
1
+ class AutomobileVariant < ActiveRecord::Base
2
+ set_primary_key :row_hash
3
+
4
+ module FuelEconomyGuide
5
+ TRANSMISSIONS = {
6
+ 'A' => 'automatic',
7
+ 'M' => 'manual',
8
+ 'L' => 'automatic', # Lockup/automatic
9
+ 'S' => 'semiautomatic', # Semiautomatic
10
+ 'C' => 'manual' # TODO verify for VW Syncro
11
+ }
12
+
13
+ ENGINE_TYPES = {
14
+ '(GUZZLER)' => nil, # "gas guzzler"
15
+ '(POLICE)' => nil, # police automobile_variant
16
+ '(MPFI)' => 'injection',
17
+ '(MPI*)' => 'injection',
18
+ '(SPFI)' => 'injection',
19
+ '(FFS)' => 'injection',
20
+ '(TURBO)' => 'turbo',
21
+ '(TRBO)' => 'turbo',
22
+ '(TC*)' => 'turbo',
23
+ '(FFS,TRBO)' => %w(injection turbo),
24
+ '(S-CHARGE)' => 'supercharger',
25
+ '(SC*)' => 'supercharger',
26
+ '(DIESEL)' => nil, # diesel
27
+ '(DSL)' => nil, # diesel
28
+ '(ROTARY)' => nil, # rotary
29
+ '(VARIABLE)' => nil, # variable displacement
30
+ '(NO-CAT)' => nil, # no catalytic converter
31
+ '(OHC)' => nil, # overhead camshaft
32
+ '(OHV)' => nil, # overhead valves
33
+ '(16-VALVE)' => nil, # 16V
34
+ '(305)' => nil, # 305 cubic inch displacement
35
+ '(307)' => nil, # 307 cubic inch displacement
36
+ '(M-ENG)' => nil,
37
+ '(W-ENG)' => nil,
38
+ '(GM-BUICK)' => nil,
39
+ '(GM-CHEV)' => nil,
40
+ '(GM-OLDS)' => nil,
41
+ '(GM-PONT)' => nil,
42
+ }
43
+
44
+ class ParserB
45
+ attr_accessor :year
46
+ def initialize(options = {})
47
+ @year = options[:year]
48
+ end
49
+
50
+ def apply(row)
51
+ row.merge!({
52
+ 'make' => row['carline_mfr_name'], # make it line up with the errata
53
+ 'model' => row['carline_name'], # ditto
54
+ 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
55
+ 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
56
+ 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
57
+ 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
58
+ 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
59
+ 'displacement' => _displacement(row['opt_disp']),
60
+ 'year' => year
61
+ })
62
+ row
63
+ end
64
+
65
+ def _displacement(str)
66
+ str = str.gsub(/[\(\)]/, '').strip
67
+ if str =~ /^(.+)L$/
68
+ $1.to_f
69
+ elsif str =~ /^(.+)CC$/
70
+ $1.to_f / 1000
71
+ end
72
+ end
73
+
74
+ def add_hints!(bus)
75
+ bus[:format] = :fixed_width
76
+ bus[:cut] = '13-' if year == 1995
77
+ bus[:schema_name] = :fuel_economy_guide_b
78
+ bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
79
+ Slither.define :fuel_economy_guide_b do |d|
80
+ d.rows do |row|
81
+ row.trap { true } # there's only one section
82
+ row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
83
+ row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
84
+ row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
85
+ row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
86
+ row.column 'carline_name' , 28, :type => :string # CARLINE NAME
87
+ row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
88
+ row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
89
+ row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
90
+ row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
91
+ row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
92
+ row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
93
+ row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
94
+ row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
95
+ row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
96
+ row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
97
+ row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
98
+ row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
99
+ row.spacer 2
100
+ row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
101
+ row.spacer 2
102
+ row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
103
+ row.spacer 2
104
+ row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
105
+ row.spacer 2
106
+ row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
107
+ row.spacer 2
108
+ row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
109
+ row.spacer 2
110
+ row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
111
+ row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
112
+ row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
113
+ row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
114
+ row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
115
+ row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
116
+ row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
117
+ row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
118
+ row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
119
+ row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
120
+ row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
121
+ row.column 'filler' , 1, :type => :string # NOT USED
122
+ row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
123
+ row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
124
+ end
125
+ end
126
+ end
127
+ end
128
+ class ParserC
129
+ attr_accessor :year
130
+ def initialize(options = {})
131
+ @year = options[:year]
132
+ end
133
+
134
+ def add_hints!(bus)
135
+ # File will decide format based on filename
136
+ end
137
+
138
+ def apply(row)
139
+ row.merge!({
140
+ 'make' => row['Manufacturer'], # make it line up with the errata
141
+ 'model' => row['carline name'], # ditto
142
+ 'drive' => row['drv'] + 'WD',
143
+ 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
144
+ 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
145
+ 'turbo' => row['T'] == 'T',
146
+ 'supercharger' => row['S'] == 'S',
147
+ 'injection' => true,
148
+ 'year' => year
149
+ })
150
+ row
151
+ end
152
+ end
153
+ class ParserD
154
+ attr_accessor :year
155
+ def initialize(options = {})
156
+ @year = options[:year]
157
+ end
158
+
159
+ def add_hints!(bus)
160
+ bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
161
+ end
162
+
163
+ def apply(row)
164
+ row.merge!({
165
+ 'make' => row['MFR'], # make it line up with the errata
166
+ 'model' => row['CAR LINE'], # ditto
167
+ 'drive' => row['DRIVE SYS'] + 'WD',
168
+ 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
169
+ 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
170
+ 'turbo' => row['TURBO'] == 'T',
171
+ 'supercharger' => row['SPCHGR'] == 'S',
172
+ 'injection' => true,
173
+ 'year' => year
174
+ })
175
+ row
176
+ end
177
+ end
178
+ end
179
+
180
+ class Guru
181
+ # the following matching methods are needed by the errata
182
+ # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
183
+
184
+ def transmission_is_blank?(row)
185
+ row['transmission'].blank?
186
+ end
187
+
188
+ def is_a_2007_gmc_or_chevrolet?(row)
189
+ row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
190
+ end
191
+
192
+ def is_a_porsche?(row)
193
+ row['make'].upcase == 'PORSCHE'
194
+ end
195
+
196
+ def is_not_a_porsche?(row)
197
+ !is_a_porsche? row
198
+ end
199
+
200
+ def is_a_mercedes_benz?(row)
201
+ row['make'] =~ /MERCEDES/i
202
+ end
203
+
204
+ def is_a_lexus?(row)
205
+ row['make'].upcase == 'LEXUS'
206
+ end
207
+
208
+ def is_a_bmw?(row)
209
+ row['make'].upcase == 'BMW'
210
+ end
211
+
212
+ def is_a_ford?(row)
213
+ row['make'].upcase == 'FORD'
214
+ end
215
+
216
+ def is_a_rolls_royce_and_model_contains_bentley?(row)
217
+ is_a_rolls_royce?(row) and model_contains_bentley?(row)
218
+ end
219
+
220
+ def is_a_bentley?(row)
221
+ row['make'].upcase == 'BENTLEY'
222
+ end
223
+
224
+ def is_a_rolls_royce?(row)
225
+ row['make'] =~ /ROLLS/i
226
+ end
227
+
228
+ def is_a_turbo_brooklands?(row)
229
+ row['model'] =~ /TURBO R\/RL BKLDS/i
230
+ end
231
+
232
+ def model_contains_maybach?(row)
233
+ row['model'] =~ /MAYBACH/i
234
+ end
235
+
236
+ def model_contains_bentley?(row)
237
+ row['model'] =~ /BENTLEY/i
238
+ end
239
+ end
240
+
241
+ errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
242
+ :responder => AutomobileVariant::Guru.new
243
+
244
+ data_miner do
245
+ # 1985---1997
246
+ (85..97).each do |yy|
247
+ filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
248
+ import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
249
+ :filename => filename,
250
+ :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
251
+ :errata => errata) do
252
+ key 'row_hash'
253
+ store 'make_name', :field_name => 'make'
254
+ store 'model_name', :field_name => 'model'
255
+ store 'year'
256
+ store 'fuel_type_code', :field_name => 'fuel_type'
257
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
258
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
259
+ store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
260
+ store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
261
+ store 'cylinders', :field_name => 'no_cyc'
262
+ store 'drive', :field_name => 'drive_system'
263
+ store 'carline_mfr_code'
264
+ store 'vi_mfr_code'
265
+ store 'carline_code'
266
+ store 'carline_class_code', :field_name => 'carline_clss'
267
+ store 'transmission'
268
+ store 'speeds'
269
+ store 'turbo'
270
+ store 'supercharger'
271
+ store 'injection'
272
+ store 'displacement'
273
+ end
274
+ end
275
+
276
+ # 1998--2005
277
+ {
278
+ 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
279
+ 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
280
+ 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
281
+ 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
282
+ 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
283
+ 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
284
+ 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
285
+ 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
286
+ }.sort { |a, b| a.first <=> b.first }.each do |year, options|
287
+ import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
288
+ :errata => errata) do
289
+ key 'row_hash'
290
+ store 'make_name', :field_name => 'make'
291
+ store 'model_name', :field_name => 'model'
292
+ store 'fuel_type_code', :field_name => 'fl'
293
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
294
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
295
+ store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
296
+ store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
297
+ store 'cylinders', :field_name => 'cyl'
298
+ store 'displacement', :field_name => 'displ'
299
+ store 'carline_class_code', :field_name => 'cls' if year >= 2000
300
+ store 'carline_class_name', :field_name => 'Class'
301
+ store 'year'
302
+ store 'transmission'
303
+ store 'speeds'
304
+ store 'turbo'
305
+ store 'supercharger'
306
+ store 'injection'
307
+ store 'drive'
308
+ end
309
+ end
310
+
311
+ # 2006--2010
312
+ {
313
+ 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
314
+ 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
315
+ 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
316
+ 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
317
+ # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
318
+ }.sort { |a, b| a.first <=> b.first }.each do |year, options|
319
+ import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
320
+ :errata => errata) do
321
+ key 'row_hash'
322
+ store 'make_name', :field_name => 'make'
323
+ store 'model_name', :field_name => 'model'
324
+ store 'fuel_type_code', :field_name => 'FUEL TYPE'
325
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
326
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
327
+ store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
328
+ store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
329
+ store 'cylinders', :field_name => 'NUMB CYL'
330
+ store 'displacement', :field_name => 'DISPLACEMENT'
331
+ store 'carline_class_code', :field_name => 'CLS'
332
+ store 'carline_class_name', :field_name => 'CLASS'
333
+ store 'year'
334
+ store 'transmission'
335
+ store 'speeds'
336
+ store 'turbo'
337
+ store 'supercharger'
338
+ store 'injection'
339
+ store 'drive'
340
+ end
341
+ end
342
+
343
+ # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
344
+ # derive :automobile_model_id # creates models by name
345
+ # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
346
+
347
+ process 'Set adjusted fuel economy' do
348
+ update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
349
+ update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
350
+ end
351
+ end
352
+
353
+ def name
354
+ extra = []
355
+ extra << "V#{cylinders}" if cylinders
356
+ extra << "#{displacement}L" if displacement
357
+ extra << "turbo" if turbo
358
+ extra << "FI" if injection
359
+ extra << "#{speeds}spd" if speeds.present?
360
+ extra << transmission if transmission.present?
361
+ extra << "(#{fuel_type.name})" if fuel_type
362
+ extra.join(' ')
363
+ end
364
+
365
+ def fuel_economy_description
366
+ [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
367
+ end
368
+ end
@@ -0,0 +1,15 @@
1
+ class Country < ActiveRecord::Base
2
+ set_primary_key :iso_3166
3
+
4
+ data_miner do
5
+ import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
6
+ key 'iso_3166', :field_number => 1
7
+ store 'name', :field_number => 0
8
+ end
9
+
10
+ import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
11
+ key 'iso_3166', :field_name => 'country code'
12
+ store 'name', :field_name => 'country'
13
+ end
14
+ end
15
+ end
data/test/test_helper.rb CHANGED
@@ -6,6 +6,8 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
6
  $LOAD_PATH.unshift(File.dirname(__FILE__))
7
7
  require 'data_miner'
8
8
 
9
+ ENV['WIP'] = true if ENV['ALL'] == 'true'
10
+
9
11
  ActiveRecord::Base.establish_connection(
10
12
  'adapter' => 'mysql',
11
13
  'database' => 'data_miner_test',
@@ -13,11 +15,12 @@ ActiveRecord::Base.establish_connection(
13
15
  'password' => 'password'
14
16
  )
15
17
 
16
- ActiveSupport::Inflector.inflections do |inflect|
17
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
18
+ Dir.glob(File.expand_path('support/*.rb', File.dirname(__FILE__))).each do |lib|
19
+ require lib
18
20
  end
19
21
 
20
- class Test::Unit::TestCase
22
+ ActiveSupport::Inflector.inflections do |inflect|
23
+ inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
21
24
  end
22
25
 
23
26
  ActiveRecord::Schema.define(:version => 20090819143429) do
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 5
9
- version: 0.5.5
8
+ - 6
9
+ version: 0.5.6
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-19 00:00:00 -04:00
19
+ date: 2010-11-01 00:00:00 -04:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -270,6 +270,12 @@ files:
270
270
  - lib/data_miner/run.rb
271
271
  - lib/data_miner/schema.rb
272
272
  - lib/data_miner/tap.rb
273
+ - test/data_miner/attribute_test.rb
274
+ - test/support/airport.rb
275
+ - test/support/country.rb
276
+ - test/support/automobile_fuel_type.rb
277
+ - test/support/aircraft.rb
278
+ - test/support/automobile_variant.rb
273
279
  - test/data_miner_test.rb
274
280
  - test/test_helper.rb
275
281
  has_rdoc: true
@@ -307,5 +313,11 @@ signing_key:
307
313
  specification_version: 3
308
314
  summary: Mine remote data into your ActiveRecord models.
309
315
  test_files:
316
+ - test/data_miner/attribute_test.rb
317
+ - test/support/airport.rb
318
+ - test/support/country.rb
319
+ - test/support/automobile_fuel_type.rb
320
+ - test/support/aircraft.rb
321
+ - test/support/automobile_variant.rb
310
322
  - test/data_miner_test.rb
311
323
  - test/test_helper.rb