data_miner 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,6 +84,7 @@ module DataMiner
84
84
  return value if value.is_a? ActiveRecord::Base # carry through trapdoor
85
85
  value = value_in_dictionary value if wants_dictionary?
86
86
  value = synthesize.call(row) if wants_synthesize?
87
+ value = nil if value.blank? and wants_nullification?
87
88
  value
88
89
  end
89
90
 
@@ -150,7 +151,7 @@ module DataMiner
150
151
  options.has_key? :static
151
152
  end
152
153
  def wants_nullification?
153
- nullify != false
154
+ nullify == true
154
155
  end
155
156
  def wants_chars?
156
157
  chars.present?
@@ -0,0 +1,55 @@
1
+ require 'test_helper'
2
+
3
+ class AttributeTest < Test::Unit::TestCase
4
+ context '#value_from_row' do
5
+ setup do
6
+ @airport = Airport.new
7
+ end
8
+ context 'nullify is true' do
9
+ setup do
10
+ @attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
11
+ end
12
+ should 'return nil if field is blank' do
13
+ assert_nil @attribute.value_from_row(
14
+ 'name' => 'DTW',
15
+ 'city' => 'Warren',
16
+ 'country_name' => 'US',
17
+ 'latitude' => '',
18
+ 'longitude' => ''
19
+ )
20
+ end
21
+ should 'return the value if field is not blank' do
22
+ assert_equal '12.34', @attribute.value_from_row(
23
+ 'name' => 'DTW',
24
+ 'city' => 'Warren',
25
+ 'country_name' => 'US',
26
+ 'latitude' => '12.34',
27
+ 'longitude' => ''
28
+ )
29
+ end
30
+ end
31
+ context 'nullify is false' do
32
+ setup do
33
+ @attribute = DataMiner::Attribute.new @airport, 'latitude'
34
+ end
35
+ should 'return the value if field is not blank' do
36
+ assert_equal '12.34', @attribute.value_from_row(
37
+ 'name' => 'DTW',
38
+ 'city' => 'Warren',
39
+ 'country_name' => 'US',
40
+ 'latitude' => '12.34',
41
+ 'longitude' => ''
42
+ )
43
+ end
44
+ should 'return blank if field is blank' do
45
+ assert_equal '', @attribute.value_from_row(
46
+ 'name' => 'DTW',
47
+ 'city' => 'Warren',
48
+ 'country_name' => 'US',
49
+ 'latitude' => '',
50
+ 'longitude' => ''
51
+ )
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,446 +1,5 @@
1
1
  require 'test_helper'
2
2
 
3
- class AutomobileFuelType < ActiveRecord::Base
4
- set_primary_key :code
5
-
6
- data_miner do
7
- import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
8
- :filename => 'Gd6-dsc.txt',
9
- :format => :fixed_width,
10
- :crop => 21..26, # inclusive
11
- :cut => '2-',
12
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
13
- :schema => [[ 'code', 2, { :type => :string } ],
14
- [ 'spacer', 2 ],
15
- [ 'name', 52, { :type => :string } ]]) do
16
- key 'code'
17
- store 'name'
18
- end
19
-
20
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
21
- key 'code'
22
- store 'name'
23
- store 'annual_distance'
24
- store 'emission_factor'
25
- end
26
-
27
- # pull electricity emission factor from residential electricity
28
- import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
29
- :select => lambda { |row| row['code'] == 'El' }) do
30
- key 'code'
31
- store 'name'
32
- store 'emission_factor'
33
- end
34
-
35
- # still need distance estimate for electric cars
36
- end
37
-
38
- CODES = {
39
- :electricity => 'El',
40
- :diesel => 'D'
41
- }
42
- end
43
-
44
- class AutomobileVariant < ActiveRecord::Base
45
- set_primary_key :row_hash
46
-
47
- module FuelEconomyGuide
48
- TRANSMISSIONS = {
49
- 'A' => 'automatic',
50
- 'M' => 'manual',
51
- 'L' => 'automatic', # Lockup/automatic
52
- 'S' => 'semiautomatic', # Semiautomatic
53
- 'C' => 'manual' # TODO verify for VW Syncro
54
- }
55
-
56
- ENGINE_TYPES = {
57
- '(GUZZLER)' => nil, # "gas guzzler"
58
- '(POLICE)' => nil, # police automobile_variant
59
- '(MPFI)' => 'injection',
60
- '(MPI*)' => 'injection',
61
- '(SPFI)' => 'injection',
62
- '(FFS)' => 'injection',
63
- '(TURBO)' => 'turbo',
64
- '(TRBO)' => 'turbo',
65
- '(TC*)' => 'turbo',
66
- '(FFS,TRBO)' => %w(injection turbo),
67
- '(S-CHARGE)' => 'supercharger',
68
- '(SC*)' => 'supercharger',
69
- '(DIESEL)' => nil, # diesel
70
- '(DSL)' => nil, # diesel
71
- '(ROTARY)' => nil, # rotary
72
- '(VARIABLE)' => nil, # variable displacement
73
- '(NO-CAT)' => nil, # no catalytic converter
74
- '(OHC)' => nil, # overhead camshaft
75
- '(OHV)' => nil, # overhead valves
76
- '(16-VALVE)' => nil, # 16V
77
- '(305)' => nil, # 305 cubic inch displacement
78
- '(307)' => nil, # 307 cubic inch displacement
79
- '(M-ENG)' => nil,
80
- '(W-ENG)' => nil,
81
- '(GM-BUICK)' => nil,
82
- '(GM-CHEV)' => nil,
83
- '(GM-OLDS)' => nil,
84
- '(GM-PONT)' => nil,
85
- }
86
-
87
- class ParserB
88
- attr_accessor :year
89
- def initialize(options = {})
90
- @year = options[:year]
91
- end
92
-
93
- def apply(row)
94
- row.merge!({
95
- 'make' => row['carline_mfr_name'], # make it line up with the errata
96
- 'model' => row['carline_name'], # ditto
97
- 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
98
- 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
99
- 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
100
- 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
101
- 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
102
- 'displacement' => _displacement(row['opt_disp']),
103
- 'year' => year
104
- })
105
- row
106
- end
107
-
108
- def _displacement(str)
109
- str = str.gsub(/[\(\)]/, '').strip
110
- if str =~ /^(.+)L$/
111
- $1.to_f
112
- elsif str =~ /^(.+)CC$/
113
- $1.to_f / 1000
114
- end
115
- end
116
-
117
- def add_hints!(bus)
118
- bus[:format] = :fixed_width
119
- bus[:cut] = '13-' if year == 1995
120
- bus[:schema_name] = :fuel_economy_guide_b
121
- bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
122
- Slither.define :fuel_economy_guide_b do |d|
123
- d.rows do |row|
124
- row.trap { true } # there's only one section
125
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
126
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
127
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
128
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
129
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
130
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
131
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
132
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
133
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
134
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
135
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
136
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
137
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
138
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
139
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
140
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
141
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
142
- row.spacer 2
143
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
144
- row.spacer 2
145
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
146
- row.spacer 2
147
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
148
- row.spacer 2
149
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
150
- row.spacer 2
151
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
152
- row.spacer 2
153
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
154
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
155
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
156
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
157
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
158
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
159
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
160
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
161
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
162
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
163
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
164
- row.column 'filler' , 1, :type => :string # NOT USED
165
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
166
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
167
- end
168
- end
169
- end
170
- end
171
- class ParserC
172
- attr_accessor :year
173
- def initialize(options = {})
174
- @year = options[:year]
175
- end
176
-
177
- def add_hints!(bus)
178
- # File will decide format based on filename
179
- end
180
-
181
- def apply(row)
182
- row.merge!({
183
- 'make' => row['Manufacturer'], # make it line up with the errata
184
- 'model' => row['carline name'], # ditto
185
- 'drive' => row['drv'] + 'WD',
186
- 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
187
- 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
188
- 'turbo' => row['T'] == 'T',
189
- 'supercharger' => row['S'] == 'S',
190
- 'injection' => true,
191
- 'year' => year
192
- })
193
- row
194
- end
195
- end
196
- class ParserD
197
- attr_accessor :year
198
- def initialize(options = {})
199
- @year = options[:year]
200
- end
201
-
202
- def add_hints!(bus)
203
- bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
204
- end
205
-
206
- def apply(row)
207
- row.merge!({
208
- 'make' => row['MFR'], # make it line up with the errata
209
- 'model' => row['CAR LINE'], # ditto
210
- 'drive' => row['DRIVE SYS'] + 'WD',
211
- 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
212
- 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
213
- 'turbo' => row['TURBO'] == 'T',
214
- 'supercharger' => row['SPCHGR'] == 'S',
215
- 'injection' => true,
216
- 'year' => year
217
- })
218
- row
219
- end
220
- end
221
- end
222
-
223
- class Guru
224
- # the following matching methods are needed by the errata
225
- # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
226
-
227
- def transmission_is_blank?(row)
228
- row['transmission'].blank?
229
- end
230
-
231
- def is_a_2007_gmc_or_chevrolet?(row)
232
- row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
233
- end
234
-
235
- def is_a_porsche?(row)
236
- row['make'].upcase == 'PORSCHE'
237
- end
238
-
239
- def is_not_a_porsche?(row)
240
- !is_a_porsche? row
241
- end
242
-
243
- def is_a_mercedes_benz?(row)
244
- row['make'] =~ /MERCEDES/i
245
- end
246
-
247
- def is_a_lexus?(row)
248
- row['make'].upcase == 'LEXUS'
249
- end
250
-
251
- def is_a_bmw?(row)
252
- row['make'].upcase == 'BMW'
253
- end
254
-
255
- def is_a_ford?(row)
256
- row['make'].upcase == 'FORD'
257
- end
258
-
259
- def is_a_rolls_royce_and_model_contains_bentley?(row)
260
- is_a_rolls_royce?(row) and model_contains_bentley?(row)
261
- end
262
-
263
- def is_a_bentley?(row)
264
- row['make'].upcase == 'BENTLEY'
265
- end
266
-
267
- def is_a_rolls_royce?(row)
268
- row['make'] =~ /ROLLS/i
269
- end
270
-
271
- def is_a_turbo_brooklands?(row)
272
- row['model'] =~ /TURBO R\/RL BKLDS/i
273
- end
274
-
275
- def model_contains_maybach?(row)
276
- row['model'] =~ /MAYBACH/i
277
- end
278
-
279
- def model_contains_bentley?(row)
280
- row['model'] =~ /BENTLEY/i
281
- end
282
- end
283
-
284
- errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
285
- :responder => AutomobileVariant::Guru.new
286
-
287
- data_miner do
288
- # 1985---1997
289
- (85..97).each do |yy|
290
- filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
291
- import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
292
- :filename => filename,
293
- :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
294
- :errata => errata) do
295
- key 'row_hash'
296
- store 'make_name', :field_name => 'make'
297
- store 'model_name', :field_name => 'model'
298
- store 'year'
299
- store 'fuel_type_code', :field_name => 'fuel_type'
300
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
301
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
302
- store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
303
- store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
304
- store 'cylinders', :field_name => 'no_cyc'
305
- store 'drive', :field_name => 'drive_system'
306
- store 'carline_mfr_code'
307
- store 'vi_mfr_code'
308
- store 'carline_code'
309
- store 'carline_class_code', :field_name => 'carline_clss'
310
- store 'transmission'
311
- store 'speeds'
312
- store 'turbo'
313
- store 'supercharger'
314
- store 'injection'
315
- store 'displacement'
316
- end
317
- end
318
-
319
- # 1998--2005
320
- {
321
- 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
322
- 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
323
- 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
324
- 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
325
- 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
326
- 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
327
- 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
328
- 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
329
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
330
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
331
- :errata => errata) do
332
- key 'row_hash'
333
- store 'make_name', :field_name => 'make'
334
- store 'model_name', :field_name => 'model'
335
- store 'fuel_type_code', :field_name => 'fl'
336
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
337
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
338
- store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
339
- store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
340
- store 'cylinders', :field_name => 'cyl'
341
- store 'displacement', :field_name => 'displ'
342
- store 'carline_class_code', :field_name => 'cls' if year >= 2000
343
- store 'carline_class_name', :field_name => 'Class'
344
- store 'year'
345
- store 'transmission'
346
- store 'speeds'
347
- store 'turbo'
348
- store 'supercharger'
349
- store 'injection'
350
- store 'drive'
351
- end
352
- end
353
-
354
- # 2006--2010
355
- {
356
- 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
357
- 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
358
- 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
359
- 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
360
- # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
361
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
362
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
363
- :errata => errata) do
364
- key 'row_hash'
365
- store 'make_name', :field_name => 'make'
366
- store 'model_name', :field_name => 'model'
367
- store 'fuel_type_code', :field_name => 'FUEL TYPE'
368
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
369
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
370
- store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
371
- store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
372
- store 'cylinders', :field_name => 'NUMB CYL'
373
- store 'displacement', :field_name => 'DISPLACEMENT'
374
- store 'carline_class_code', :field_name => 'CLS'
375
- store 'carline_class_name', :field_name => 'CLASS'
376
- store 'year'
377
- store 'transmission'
378
- store 'speeds'
379
- store 'turbo'
380
- store 'supercharger'
381
- store 'injection'
382
- store 'drive'
383
- end
384
- end
385
-
386
- # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
387
- # derive :automobile_model_id # creates models by name
388
- # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
389
-
390
- process 'Set adjusted fuel economy' do
391
- update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
392
- update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
393
- end
394
- end
395
-
396
- def name
397
- extra = []
398
- extra << "V#{cylinders}" if cylinders
399
- extra << "#{displacement}L" if displacement
400
- extra << "turbo" if turbo
401
- extra << "FI" if injection
402
- extra << "#{speeds}spd" if speeds.present?
403
- extra << transmission if transmission.present?
404
- extra << "(#{fuel_type.name})" if fuel_type
405
- extra.join(' ')
406
- end
407
-
408
- def fuel_economy_description
409
- [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
410
- end
411
- end
412
-
413
- class Country < ActiveRecord::Base
414
- set_primary_key :iso_3166
415
-
416
- data_miner do
417
- import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
418
- key 'iso_3166', :field_number => 1
419
- store 'name', :field_number => 0
420
- end
421
-
422
- import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
423
- key 'iso_3166', :field_name => 'country code'
424
- store 'name', :field_name => 'country'
425
- end
426
- end
427
- end
428
-
429
- class Airport < ActiveRecord::Base
430
- set_primary_key :iata_code
431
-
432
- data_miner do
433
- import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
434
- key 'iata_code', :field_number => 4
435
- store 'name', :field_number => 1
436
- store 'city', :field_number => 2
437
- store 'country_name', :field_number => 3
438
- store 'latitude', :field_number => 6
439
- store 'longitude', :field_number => 7
440
- end
441
- end
442
- end
443
-
444
3
  class TappedAirport < ActiveRecord::Base
445
4
  set_primary_key :iata_code
446
5
 
@@ -947,103 +506,6 @@ class T100FlightSegment < ActiveRecord::Base
947
506
  end
948
507
  end
949
508
 
950
- require 'loose_tight_dictionary'
951
- class Aircraft < ActiveRecord::Base
952
- set_primary_key :icao_code
953
-
954
- def self.bts_dictionary
955
- @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
956
- :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
957
- :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
958
- :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
959
- :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
960
- :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
961
- end
962
-
963
- class BtsAircraftTypeCodeMatcher
964
- def match(left_record)
965
- right_record = Aircraft.bts_dictionary.left_to_right left_record
966
- right_record['Aircraft Type'] if right_record
967
- end
968
- end
969
-
970
- class BtsNameMatcher
971
- def match(left_record)
972
- right_record = Aircraft.bts_dictionary.left_to_right left_record
973
- right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
974
- end
975
- end
976
-
977
- class Guru
978
- # for errata
979
- def is_attributed_to_boeing?(row)
980
- row['Manufacturer'] =~ /BOEING/i
981
- end
982
-
983
- def is_attributed_to_cessna?(row)
984
- row['Manufacturer'] =~ /CESSNA/i
985
- end
986
-
987
- def is_attributed_to_fokker?(row)
988
- row['Manufacturer'] =~ /FOKKER/i
989
- end
990
-
991
- def is_not_attributed_to_aerospatiale?(row)
992
- not row['Manufacturer'] =~ /AEROSPATIALE/i
993
- end
994
-
995
- def is_not_attributed_to_cessna?(row)
996
- not row['Manufacturer'] =~ /CESSNA/i
997
- end
998
-
999
- def is_not_attributed_to_learjet?(row)
1000
- not row['Manufacturer'] =~ /LEAR/i
1001
- end
1002
-
1003
- def is_not_attributed_to_dehavilland?(row)
1004
- not row['Manufacturer'] =~ /DE ?HAVILLAND/i
1005
- end
1006
-
1007
- def is_not_attributed_to_mcdonnell_douglas?(row)
1008
- not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
1009
- end
1010
-
1011
- def is_not_a_dc_plane?(row)
1012
- not row['Model'] =~ /DC/i
1013
- end
1014
-
1015
- def is_a_crj_900?(row)
1016
- row['Designator'].downcase == 'crj9'
1017
- end
1018
- end
1019
-
1020
- data_miner do
1021
- # ('A'..'Z').each do |letter|
1022
- # Note: for the purposes of testing, only importing "D"
1023
- %w{ D }.each do |letter|
1024
- import("ICAO codes starting with letter #{letter} used by the FAA",
1025
- :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
1026
- :encoding => 'US-ASCII',
1027
- :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
1028
- :responder => Aircraft::Guru.new),
1029
- :row_xpath => '//table/tr[2]/td/table/tr',
1030
- :column_xpath => 'td') do
1031
- key 'icao_code', :field_name => 'Designator'
1032
- store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
1033
- store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
1034
- store 'manufacturer_name', :field_name => 'Manufacturer'
1035
- store 'name', :field_name => 'Model'
1036
- end
1037
-
1038
- import 'Brighter Planet aircraft class codes',
1039
- :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
1040
- key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
1041
- store 'brighter_planet_aircraft_class_code'
1042
- end
1043
- end
1044
- end
1045
- end
1046
-
1047
509
  # note that this depends on stuff in Aircraft
1048
510
  class AircraftDeux < ActiveRecord::Base
1049
511
  set_primary_key :icao_code
@@ -1166,7 +628,19 @@ end
1166
628
 
1167
629
  # todo: have somebody properly organize these
1168
630
  class DataMinerTest < Test::Unit::TestCase
1169
- if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
631
+ if ENV['WIP']
632
+ context 'with nullify option' do
633
+ should 'treat blank fields as null values' do
634
+ Aircraft.delete_all
635
+ Aircraft.data_miner_runs.delete_all
636
+ Aircraft.run_data_miner!
637
+ assert_greater_than 0, Aircraft.count
638
+ assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
639
+ end
640
+ end
641
+ end
642
+
643
+ if ENV['ALL'] == 'true'
1170
644
  should 'directly create a table for the model' do
1171
645
  if AutomobileMakeFleetYear.table_exists?
1172
646
  ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
@@ -0,0 +1,99 @@
1
+ require 'loose_tight_dictionary'
2
+
3
+ class Aircraft < ActiveRecord::Base
4
+ set_primary_key :icao_code
5
+ set_table_name 'aircraft'
6
+
7
+ def self.bts_dictionary
8
+ @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
9
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
10
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
11
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
12
+ :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
13
+ :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
14
+ end
15
+
16
+ class BtsAircraftTypeCodeMatcher
17
+ def match(left_record)
18
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
19
+ right_record['Aircraft Type'] if right_record
20
+ end
21
+ end
22
+
23
+ class BtsNameMatcher
24
+ def match(left_record)
25
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
26
+ right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
27
+ end
28
+ end
29
+
30
+ class Guru
31
+ # for errata
32
+ def is_attributed_to_boeing?(row)
33
+ row['Manufacturer'] =~ /BOEING/i
34
+ end
35
+
36
+ def is_attributed_to_cessna?(row)
37
+ row['Manufacturer'] =~ /CESSNA/i
38
+ end
39
+
40
+ def is_attributed_to_fokker?(row)
41
+ row['Manufacturer'] =~ /FOKKER/i
42
+ end
43
+
44
+ def is_not_attributed_to_aerospatiale?(row)
45
+ not row['Manufacturer'] =~ /AEROSPATIALE/i
46
+ end
47
+
48
+ def is_not_attributed_to_cessna?(row)
49
+ not row['Manufacturer'] =~ /CESSNA/i
50
+ end
51
+
52
+ def is_not_attributed_to_learjet?(row)
53
+ not row['Manufacturer'] =~ /LEAR/i
54
+ end
55
+
56
+ def is_not_attributed_to_dehavilland?(row)
57
+ not row['Manufacturer'] =~ /DE ?HAVILLAND/i
58
+ end
59
+
60
+ def is_not_attributed_to_mcdonnell_douglas?(row)
61
+ not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
62
+ end
63
+
64
+ def is_not_a_dc_plane?(row)
65
+ not row['Model'] =~ /DC/i
66
+ end
67
+
68
+ def is_a_crj_900?(row)
69
+ row['Designator'].downcase == 'crj9'
70
+ end
71
+ end
72
+
73
+ data_miner do
74
+ # ('A'..'Z').each do |letter|
75
+ # Note: for the purposes of testing, only importing "D"
76
+ %w{ D }.each do |letter|
77
+ import("ICAO codes starting with letter #{letter} used by the FAA",
78
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
79
+ :encoding => 'US-ASCII',
80
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
81
+ :responder => Aircraft::Guru.new),
82
+ :row_xpath => '//table/tr[2]/td/table/tr',
83
+ :column_xpath => 'td') do
84
+ key 'icao_code', :field_name => 'Designator'
85
+ store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new, :nullify => true
86
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new, :nullify => true
87
+ store 'manufacturer_name', :field_name => 'Manufacturer', :nullify => true
88
+ store 'name', :field_name => 'Model', :nullify => true
89
+ end
90
+
91
+ import 'Brighter Planet aircraft class codes',
92
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
93
+ key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
94
+ store 'brighter_planet_aircraft_class_code', :nullify => true
95
+ end
96
+ end
97
+ end
98
+ end
99
+
@@ -0,0 +1,14 @@
1
+ class Airport < ActiveRecord::Base
2
+ set_primary_key :iata_code
3
+
4
+ data_miner do
5
+ import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
6
+ key 'iata_code', :field_number => 4
7
+ store 'name', :field_number => 1
8
+ store 'city', :field_number => 2
9
+ store 'country_name', :field_number => 3
10
+ store 'latitude', :field_number => 6, :nullify => true
11
+ store 'longitude', :field_number => 7, :nullify => true
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,40 @@
1
+ class AutomobileFuelType < ActiveRecord::Base
2
+ set_primary_key :code
3
+
4
+ data_miner do
5
+ import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
6
+ :filename => 'Gd6-dsc.txt',
7
+ :format => :fixed_width,
8
+ :crop => 21..26, # inclusive
9
+ :cut => '2-',
10
+ :select => lambda { |row| /\A[A-Z]/.match row[:code] },
11
+ :schema => [[ 'code', 2, { :type => :string } ],
12
+ [ 'spacer', 2 ],
13
+ [ 'name', 52, { :type => :string } ]]) do
14
+ key 'code'
15
+ store 'name'
16
+ end
17
+
18
+ import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
19
+ key 'code'
20
+ store 'name'
21
+ store 'annual_distance'
22
+ store 'emission_factor'
23
+ end
24
+
25
+ # pull electricity emission factor from residential electricity
26
+ import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
27
+ :select => lambda { |row| row['code'] == 'El' }) do
28
+ key 'code'
29
+ store 'name'
30
+ store 'emission_factor'
31
+ end
32
+
33
+ # still need distance estimate for electric cars
34
+ end
35
+
36
+ CODES = {
37
+ :electricity => 'El',
38
+ :diesel => 'D'
39
+ }
40
+ end
@@ -0,0 +1,368 @@
1
+ class AutomobileVariant < ActiveRecord::Base
2
+ set_primary_key :row_hash
3
+
4
+ module FuelEconomyGuide
5
+ TRANSMISSIONS = {
6
+ 'A' => 'automatic',
7
+ 'M' => 'manual',
8
+ 'L' => 'automatic', # Lockup/automatic
9
+ 'S' => 'semiautomatic', # Semiautomatic
10
+ 'C' => 'manual' # TODO verify for VW Syncro
11
+ }
12
+
13
+ ENGINE_TYPES = {
14
+ '(GUZZLER)' => nil, # "gas guzzler"
15
+ '(POLICE)' => nil, # police automobile_variant
16
+ '(MPFI)' => 'injection',
17
+ '(MPI*)' => 'injection',
18
+ '(SPFI)' => 'injection',
19
+ '(FFS)' => 'injection',
20
+ '(TURBO)' => 'turbo',
21
+ '(TRBO)' => 'turbo',
22
+ '(TC*)' => 'turbo',
23
+ '(FFS,TRBO)' => %w(injection turbo),
24
+ '(S-CHARGE)' => 'supercharger',
25
+ '(SC*)' => 'supercharger',
26
+ '(DIESEL)' => nil, # diesel
27
+ '(DSL)' => nil, # diesel
28
+ '(ROTARY)' => nil, # rotary
29
+ '(VARIABLE)' => nil, # variable displacement
30
+ '(NO-CAT)' => nil, # no catalytic converter
31
+ '(OHC)' => nil, # overhead camshaft
32
+ '(OHV)' => nil, # overhead valves
33
+ '(16-VALVE)' => nil, # 16V
34
+ '(305)' => nil, # 305 cubic inch displacement
35
+ '(307)' => nil, # 307 cubic inch displacement
36
+ '(M-ENG)' => nil,
37
+ '(W-ENG)' => nil,
38
+ '(GM-BUICK)' => nil,
39
+ '(GM-CHEV)' => nil,
40
+ '(GM-OLDS)' => nil,
41
+ '(GM-PONT)' => nil,
42
+ }
43
+
44
+ class ParserB
45
+ attr_accessor :year
46
+ def initialize(options = {})
47
+ @year = options[:year]
48
+ end
49
+
50
+ def apply(row)
51
+ row.merge!({
52
+ 'make' => row['carline_mfr_name'], # make it line up with the errata
53
+ 'model' => row['carline_name'], # ditto
54
+ 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
55
+ 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
56
+ 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
57
+ 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
58
+ 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
59
+ 'displacement' => _displacement(row['opt_disp']),
60
+ 'year' => year
61
+ })
62
+ row
63
+ end
64
+
65
+ def _displacement(str)
66
+ str = str.gsub(/[\(\)]/, '').strip
67
+ if str =~ /^(.+)L$/
68
+ $1.to_f
69
+ elsif str =~ /^(.+)CC$/
70
+ $1.to_f / 1000
71
+ end
72
+ end
73
+
74
+ def add_hints!(bus)
75
+ bus[:format] = :fixed_width
76
+ bus[:cut] = '13-' if year == 1995
77
+ bus[:schema_name] = :fuel_economy_guide_b
78
+ bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
79
+ Slither.define :fuel_economy_guide_b do |d|
80
+ d.rows do |row|
81
+ row.trap { true } # there's only one section
82
+ row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
83
+ row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
84
+ row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
85
+ row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
86
+ row.column 'carline_name' , 28, :type => :string # CARLINE NAME
87
+ row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
88
+ row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
89
+ row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
90
+ row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
91
+ row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
92
+ row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
93
+ row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
94
+ row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
95
+ row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
96
+ row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
97
+ row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
98
+ row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
99
+ row.spacer 2
100
+ row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
101
+ row.spacer 2
102
+ row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
103
+ row.spacer 2
104
+ row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
105
+ row.spacer 2
106
+ row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
107
+ row.spacer 2
108
+ row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
109
+ row.spacer 2
110
+ row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
111
+ row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
112
+ row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
113
+ row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
114
+ row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
115
+ row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
116
+ row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
117
+ row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
118
+ row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
119
+ row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
120
+ row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
121
+ row.column 'filler' , 1, :type => :string # NOT USED
122
+ row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
123
+ row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
124
+ end
125
+ end
126
+ end
127
+ end
128
+ class ParserC
129
+ attr_accessor :year
130
+ def initialize(options = {})
131
+ @year = options[:year]
132
+ end
133
+
134
+ def add_hints!(bus)
135
+ # File will decide format based on filename
136
+ end
137
+
138
+ def apply(row)
139
+ row.merge!({
140
+ 'make' => row['Manufacturer'], # make it line up with the errata
141
+ 'model' => row['carline name'], # ditto
142
+ 'drive' => row['drv'] + 'WD',
143
+ 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
144
+ 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
145
+ 'turbo' => row['T'] == 'T',
146
+ 'supercharger' => row['S'] == 'S',
147
+ 'injection' => true,
148
+ 'year' => year
149
+ })
150
+ row
151
+ end
152
+ end
153
+ class ParserD
154
+ attr_accessor :year
155
+ def initialize(options = {})
156
+ @year = options[:year]
157
+ end
158
+
159
+ def add_hints!(bus)
160
+ bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
161
+ end
162
+
163
+ def apply(row)
164
+ row.merge!({
165
+ 'make' => row['MFR'], # make it line up with the errata
166
+ 'model' => row['CAR LINE'], # ditto
167
+ 'drive' => row['DRIVE SYS'] + 'WD',
168
+ 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
169
+ 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
170
+ 'turbo' => row['TURBO'] == 'T',
171
+ 'supercharger' => row['SPCHGR'] == 'S',
172
+ 'injection' => true,
173
+ 'year' => year
174
+ })
175
+ row
176
+ end
177
+ end
178
+ end
179
+
180
+ class Guru
181
+ # the following matching methods are needed by the errata
182
+ # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
183
+
184
+ def transmission_is_blank?(row)
185
+ row['transmission'].blank?
186
+ end
187
+
188
+ def is_a_2007_gmc_or_chevrolet?(row)
189
+ row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
190
+ end
191
+
192
+ def is_a_porsche?(row)
193
+ row['make'].upcase == 'PORSCHE'
194
+ end
195
+
196
+ def is_not_a_porsche?(row)
197
+ !is_a_porsche? row
198
+ end
199
+
200
+ def is_a_mercedes_benz?(row)
201
+ row['make'] =~ /MERCEDES/i
202
+ end
203
+
204
+ def is_a_lexus?(row)
205
+ row['make'].upcase == 'LEXUS'
206
+ end
207
+
208
+ def is_a_bmw?(row)
209
+ row['make'].upcase == 'BMW'
210
+ end
211
+
212
+ def is_a_ford?(row)
213
+ row['make'].upcase == 'FORD'
214
+ end
215
+
216
+ def is_a_rolls_royce_and_model_contains_bentley?(row)
217
+ is_a_rolls_royce?(row) and model_contains_bentley?(row)
218
+ end
219
+
220
+ def is_a_bentley?(row)
221
+ row['make'].upcase == 'BENTLEY'
222
+ end
223
+
224
+ def is_a_rolls_royce?(row)
225
+ row['make'] =~ /ROLLS/i
226
+ end
227
+
228
+ def is_a_turbo_brooklands?(row)
229
+ row['model'] =~ /TURBO R\/RL BKLDS/i
230
+ end
231
+
232
+ def model_contains_maybach?(row)
233
+ row['model'] =~ /MAYBACH/i
234
+ end
235
+
236
+ def model_contains_bentley?(row)
237
+ row['model'] =~ /BENTLEY/i
238
+ end
239
+ end
240
+
241
+ errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
242
+ :responder => AutomobileVariant::Guru.new
243
+
244
+ data_miner do
245
+ # 1985---1997
246
+ (85..97).each do |yy|
247
+ filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
248
+ import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
249
+ :filename => filename,
250
+ :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
251
+ :errata => errata) do
252
+ key 'row_hash'
253
+ store 'make_name', :field_name => 'make'
254
+ store 'model_name', :field_name => 'model'
255
+ store 'year'
256
+ store 'fuel_type_code', :field_name => 'fuel_type'
257
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
258
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
259
+ store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
260
+ store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
261
+ store 'cylinders', :field_name => 'no_cyc'
262
+ store 'drive', :field_name => 'drive_system'
263
+ store 'carline_mfr_code'
264
+ store 'vi_mfr_code'
265
+ store 'carline_code'
266
+ store 'carline_class_code', :field_name => 'carline_clss'
267
+ store 'transmission'
268
+ store 'speeds'
269
+ store 'turbo'
270
+ store 'supercharger'
271
+ store 'injection'
272
+ store 'displacement'
273
+ end
274
+ end
275
+
276
+ # 1998--2005
277
+ {
278
+ 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
279
+ 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
280
+ 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
281
+ 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
282
+ 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
283
+ 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
284
+ 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
285
+ 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
286
+ }.sort { |a, b| a.first <=> b.first }.each do |year, options|
287
+ import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
288
+ :errata => errata) do
289
+ key 'row_hash'
290
+ store 'make_name', :field_name => 'make'
291
+ store 'model_name', :field_name => 'model'
292
+ store 'fuel_type_code', :field_name => 'fl'
293
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
294
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
295
+ store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
296
+ store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
297
+ store 'cylinders', :field_name => 'cyl'
298
+ store 'displacement', :field_name => 'displ'
299
+ store 'carline_class_code', :field_name => 'cls' if year >= 2000
300
+ store 'carline_class_name', :field_name => 'Class'
301
+ store 'year'
302
+ store 'transmission'
303
+ store 'speeds'
304
+ store 'turbo'
305
+ store 'supercharger'
306
+ store 'injection'
307
+ store 'drive'
308
+ end
309
+ end
310
+
311
+ # 2006--2010
312
+ {
313
+ 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
314
+ 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
315
+ 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
316
+ 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
317
+ # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
318
+ }.sort { |a, b| a.first <=> b.first }.each do |year, options|
319
+ import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
320
+ :errata => errata) do
321
+ key 'row_hash'
322
+ store 'make_name', :field_name => 'make'
323
+ store 'model_name', :field_name => 'model'
324
+ store 'fuel_type_code', :field_name => 'FUEL TYPE'
325
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
326
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
327
+ store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
328
+ store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
329
+ store 'cylinders', :field_name => 'NUMB CYL'
330
+ store 'displacement', :field_name => 'DISPLACEMENT'
331
+ store 'carline_class_code', :field_name => 'CLS'
332
+ store 'carline_class_name', :field_name => 'CLASS'
333
+ store 'year'
334
+ store 'transmission'
335
+ store 'speeds'
336
+ store 'turbo'
337
+ store 'supercharger'
338
+ store 'injection'
339
+ store 'drive'
340
+ end
341
+ end
342
+
343
+ # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
344
+ # derive :automobile_model_id # creates models by name
345
+ # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
346
+
347
+ process 'Set adjusted fuel economy' do
348
+ update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
349
+ update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
350
+ end
351
+ end
352
+
353
+ def name
354
+ extra = []
355
+ extra << "V#{cylinders}" if cylinders
356
+ extra << "#{displacement}L" if displacement
357
+ extra << "turbo" if turbo
358
+ extra << "FI" if injection
359
+ extra << "#{speeds}spd" if speeds.present?
360
+ extra << transmission if transmission.present?
361
+ extra << "(#{fuel_type.name})" if fuel_type
362
+ extra.join(' ')
363
+ end
364
+
365
+ def fuel_economy_description
366
+ [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
367
+ end
368
+ end
@@ -0,0 +1,15 @@
1
+ class Country < ActiveRecord::Base
2
+ set_primary_key :iso_3166
3
+
4
+ data_miner do
5
+ import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
6
+ key 'iso_3166', :field_number => 1
7
+ store 'name', :field_number => 0
8
+ end
9
+
10
+ import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
11
+ key 'iso_3166', :field_name => 'country code'
12
+ store 'name', :field_name => 'country'
13
+ end
14
+ end
15
+ end
data/test/test_helper.rb CHANGED
@@ -6,6 +6,8 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
6
  $LOAD_PATH.unshift(File.dirname(__FILE__))
7
7
  require 'data_miner'
8
8
 
9
+ ENV['WIP'] = true if ENV['ALL'] == 'true'
10
+
9
11
  ActiveRecord::Base.establish_connection(
10
12
  'adapter' => 'mysql',
11
13
  'database' => 'data_miner_test',
@@ -13,11 +15,12 @@ ActiveRecord::Base.establish_connection(
13
15
  'password' => 'password'
14
16
  )
15
17
 
16
- ActiveSupport::Inflector.inflections do |inflect|
17
- inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
18
+ Dir.glob(File.expand_path('support/*.rb', File.dirname(__FILE__))).each do |lib|
19
+ require lib
18
20
  end
19
21
 
20
- class Test::Unit::TestCase
22
+ ActiveSupport::Inflector.inflections do |inflect|
23
+ inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
21
24
  end
22
25
 
23
26
  ActiveRecord::Schema.define(:version => 20090819143429) do
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 5
9
- version: 0.5.5
8
+ - 6
9
+ version: 0.5.6
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-19 00:00:00 -04:00
19
+ date: 2010-11-01 00:00:00 -04:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -270,6 +270,12 @@ files:
270
270
  - lib/data_miner/run.rb
271
271
  - lib/data_miner/schema.rb
272
272
  - lib/data_miner/tap.rb
273
+ - test/data_miner/attribute_test.rb
274
+ - test/support/airport.rb
275
+ - test/support/country.rb
276
+ - test/support/automobile_fuel_type.rb
277
+ - test/support/aircraft.rb
278
+ - test/support/automobile_variant.rb
273
279
  - test/data_miner_test.rb
274
280
  - test/test_helper.rb
275
281
  has_rdoc: true
@@ -307,5 +313,11 @@ signing_key:
307
313
  specification_version: 3
308
314
  summary: Mine remote data into your ActiveRecord models.
309
315
  test_files:
316
+ - test/data_miner/attribute_test.rb
317
+ - test/support/airport.rb
318
+ - test/support/country.rb
319
+ - test/support/automobile_fuel_type.rb
320
+ - test/support/aircraft.rb
321
+ - test/support/automobile_variant.rb
310
322
  - test/data_miner_test.rb
311
323
  - test/test_helper.rb