data_miner 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/data_miner/attribute.rb +2 -1
- data/test/data_miner/attribute_test.rb +55 -0
- data/test/data_miner_test.rb +13 -539
- data/test/support/aircraft.rb +99 -0
- data/test/support/airport.rb +14 -0
- data/test/support/automobile_fuel_type.rb +40 -0
- data/test/support/automobile_variant.rb +368 -0
- data/test/support/country.rb +15 -0
- data/test/test_helper.rb +6 -3
- metadata +15 -3
data/lib/data_miner/attribute.rb
CHANGED
@@ -84,6 +84,7 @@ module DataMiner
|
|
84
84
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
85
85
|
value = value_in_dictionary value if wants_dictionary?
|
86
86
|
value = synthesize.call(row) if wants_synthesize?
|
87
|
+
value = nil if value.blank? and wants_nullification?
|
87
88
|
value
|
88
89
|
end
|
89
90
|
|
@@ -150,7 +151,7 @@ module DataMiner
|
|
150
151
|
options.has_key? :static
|
151
152
|
end
|
152
153
|
def wants_nullification?
|
153
|
-
nullify
|
154
|
+
nullify == true
|
154
155
|
end
|
155
156
|
def wants_chars?
|
156
157
|
chars.present?
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AttributeTest < Test::Unit::TestCase
|
4
|
+
context '#value_from_row' do
|
5
|
+
setup do
|
6
|
+
@airport = Airport.new
|
7
|
+
end
|
8
|
+
context 'nullify is true' do
|
9
|
+
setup do
|
10
|
+
@attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
|
11
|
+
end
|
12
|
+
should 'return nil if field is blank' do
|
13
|
+
assert_nil @attribute.value_from_row(
|
14
|
+
'name' => 'DTW',
|
15
|
+
'city' => 'Warren',
|
16
|
+
'country_name' => 'US',
|
17
|
+
'latitude' => '',
|
18
|
+
'longitude' => ''
|
19
|
+
)
|
20
|
+
end
|
21
|
+
should 'return the value if field is not blank' do
|
22
|
+
assert_equal '12.34', @attribute.value_from_row(
|
23
|
+
'name' => 'DTW',
|
24
|
+
'city' => 'Warren',
|
25
|
+
'country_name' => 'US',
|
26
|
+
'latitude' => '12.34',
|
27
|
+
'longitude' => ''
|
28
|
+
)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
context 'nullify is false' do
|
32
|
+
setup do
|
33
|
+
@attribute = DataMiner::Attribute.new @airport, 'latitude'
|
34
|
+
end
|
35
|
+
should 'return the value if field is not blank' do
|
36
|
+
assert_equal '12.34', @attribute.value_from_row(
|
37
|
+
'name' => 'DTW',
|
38
|
+
'city' => 'Warren',
|
39
|
+
'country_name' => 'US',
|
40
|
+
'latitude' => '12.34',
|
41
|
+
'longitude' => ''
|
42
|
+
)
|
43
|
+
end
|
44
|
+
should 'return blank if field is blank' do
|
45
|
+
assert_equal '', @attribute.value_from_row(
|
46
|
+
'name' => 'DTW',
|
47
|
+
'city' => 'Warren',
|
48
|
+
'country_name' => 'US',
|
49
|
+
'latitude' => '',
|
50
|
+
'longitude' => ''
|
51
|
+
)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1,446 +1,5 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
|
-
class AutomobileFuelType < ActiveRecord::Base
|
4
|
-
set_primary_key :code
|
5
|
-
|
6
|
-
data_miner do
|
7
|
-
import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
8
|
-
:filename => 'Gd6-dsc.txt',
|
9
|
-
:format => :fixed_width,
|
10
|
-
:crop => 21..26, # inclusive
|
11
|
-
:cut => '2-',
|
12
|
-
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
13
|
-
:schema => [[ 'code', 2, { :type => :string } ],
|
14
|
-
[ 'spacer', 2 ],
|
15
|
-
[ 'name', 52, { :type => :string } ]]) do
|
16
|
-
key 'code'
|
17
|
-
store 'name'
|
18
|
-
end
|
19
|
-
|
20
|
-
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
|
21
|
-
key 'code'
|
22
|
-
store 'name'
|
23
|
-
store 'annual_distance'
|
24
|
-
store 'emission_factor'
|
25
|
-
end
|
26
|
-
|
27
|
-
# pull electricity emission factor from residential electricity
|
28
|
-
import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
|
29
|
-
:select => lambda { |row| row['code'] == 'El' }) do
|
30
|
-
key 'code'
|
31
|
-
store 'name'
|
32
|
-
store 'emission_factor'
|
33
|
-
end
|
34
|
-
|
35
|
-
# still need distance estimate for electric cars
|
36
|
-
end
|
37
|
-
|
38
|
-
CODES = {
|
39
|
-
:electricity => 'El',
|
40
|
-
:diesel => 'D'
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
44
|
-
class AutomobileVariant < ActiveRecord::Base
|
45
|
-
set_primary_key :row_hash
|
46
|
-
|
47
|
-
module FuelEconomyGuide
|
48
|
-
TRANSMISSIONS = {
|
49
|
-
'A' => 'automatic',
|
50
|
-
'M' => 'manual',
|
51
|
-
'L' => 'automatic', # Lockup/automatic
|
52
|
-
'S' => 'semiautomatic', # Semiautomatic
|
53
|
-
'C' => 'manual' # TODO verify for VW Syncro
|
54
|
-
}
|
55
|
-
|
56
|
-
ENGINE_TYPES = {
|
57
|
-
'(GUZZLER)' => nil, # "gas guzzler"
|
58
|
-
'(POLICE)' => nil, # police automobile_variant
|
59
|
-
'(MPFI)' => 'injection',
|
60
|
-
'(MPI*)' => 'injection',
|
61
|
-
'(SPFI)' => 'injection',
|
62
|
-
'(FFS)' => 'injection',
|
63
|
-
'(TURBO)' => 'turbo',
|
64
|
-
'(TRBO)' => 'turbo',
|
65
|
-
'(TC*)' => 'turbo',
|
66
|
-
'(FFS,TRBO)' => %w(injection turbo),
|
67
|
-
'(S-CHARGE)' => 'supercharger',
|
68
|
-
'(SC*)' => 'supercharger',
|
69
|
-
'(DIESEL)' => nil, # diesel
|
70
|
-
'(DSL)' => nil, # diesel
|
71
|
-
'(ROTARY)' => nil, # rotary
|
72
|
-
'(VARIABLE)' => nil, # variable displacement
|
73
|
-
'(NO-CAT)' => nil, # no catalytic converter
|
74
|
-
'(OHC)' => nil, # overhead camshaft
|
75
|
-
'(OHV)' => nil, # overhead valves
|
76
|
-
'(16-VALVE)' => nil, # 16V
|
77
|
-
'(305)' => nil, # 305 cubic inch displacement
|
78
|
-
'(307)' => nil, # 307 cubic inch displacement
|
79
|
-
'(M-ENG)' => nil,
|
80
|
-
'(W-ENG)' => nil,
|
81
|
-
'(GM-BUICK)' => nil,
|
82
|
-
'(GM-CHEV)' => nil,
|
83
|
-
'(GM-OLDS)' => nil,
|
84
|
-
'(GM-PONT)' => nil,
|
85
|
-
}
|
86
|
-
|
87
|
-
class ParserB
|
88
|
-
attr_accessor :year
|
89
|
-
def initialize(options = {})
|
90
|
-
@year = options[:year]
|
91
|
-
end
|
92
|
-
|
93
|
-
def apply(row)
|
94
|
-
row.merge!({
|
95
|
-
'make' => row['carline_mfr_name'], # make it line up with the errata
|
96
|
-
'model' => row['carline_name'], # ditto
|
97
|
-
'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
|
98
|
-
'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
|
99
|
-
'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
|
100
|
-
'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
|
101
|
-
'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
|
102
|
-
'displacement' => _displacement(row['opt_disp']),
|
103
|
-
'year' => year
|
104
|
-
})
|
105
|
-
row
|
106
|
-
end
|
107
|
-
|
108
|
-
def _displacement(str)
|
109
|
-
str = str.gsub(/[\(\)]/, '').strip
|
110
|
-
if str =~ /^(.+)L$/
|
111
|
-
$1.to_f
|
112
|
-
elsif str =~ /^(.+)CC$/
|
113
|
-
$1.to_f / 1000
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def add_hints!(bus)
|
118
|
-
bus[:format] = :fixed_width
|
119
|
-
bus[:cut] = '13-' if year == 1995
|
120
|
-
bus[:schema_name] = :fuel_economy_guide_b
|
121
|
-
bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
|
122
|
-
Slither.define :fuel_economy_guide_b do |d|
|
123
|
-
d.rows do |row|
|
124
|
-
row.trap { true } # there's only one section
|
125
|
-
row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
|
126
|
-
row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
|
127
|
-
row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
|
128
|
-
row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
|
129
|
-
row.column 'carline_name' , 28, :type => :string # CARLINE NAME
|
130
|
-
row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
|
131
|
-
row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
|
132
|
-
row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
|
133
|
-
row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
|
134
|
-
row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
|
135
|
-
row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
|
136
|
-
row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
|
137
|
-
row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
|
138
|
-
row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
|
139
|
-
row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
|
140
|
-
row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
|
141
|
-
row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
|
142
|
-
row.spacer 2
|
143
|
-
row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
|
144
|
-
row.spacer 2
|
145
|
-
row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
|
146
|
-
row.spacer 2
|
147
|
-
row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
|
148
|
-
row.spacer 2
|
149
|
-
row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
|
150
|
-
row.spacer 2
|
151
|
-
row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
|
152
|
-
row.spacer 2
|
153
|
-
row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
|
154
|
-
row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
|
155
|
-
row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
|
156
|
-
row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
|
157
|
-
row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
|
158
|
-
row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
159
|
-
row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
160
|
-
row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
161
|
-
row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
|
162
|
-
row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
|
163
|
-
row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
|
164
|
-
row.column 'filler' , 1, :type => :string # NOT USED
|
165
|
-
row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
|
166
|
-
row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
class ParserC
|
172
|
-
attr_accessor :year
|
173
|
-
def initialize(options = {})
|
174
|
-
@year = options[:year]
|
175
|
-
end
|
176
|
-
|
177
|
-
def add_hints!(bus)
|
178
|
-
# File will decide format based on filename
|
179
|
-
end
|
180
|
-
|
181
|
-
def apply(row)
|
182
|
-
row.merge!({
|
183
|
-
'make' => row['Manufacturer'], # make it line up with the errata
|
184
|
-
'model' => row['carline name'], # ditto
|
185
|
-
'drive' => row['drv'] + 'WD',
|
186
|
-
'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
|
187
|
-
'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
|
188
|
-
'turbo' => row['T'] == 'T',
|
189
|
-
'supercharger' => row['S'] == 'S',
|
190
|
-
'injection' => true,
|
191
|
-
'year' => year
|
192
|
-
})
|
193
|
-
row
|
194
|
-
end
|
195
|
-
end
|
196
|
-
class ParserD
|
197
|
-
attr_accessor :year
|
198
|
-
def initialize(options = {})
|
199
|
-
@year = options[:year]
|
200
|
-
end
|
201
|
-
|
202
|
-
def add_hints!(bus)
|
203
|
-
bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
|
204
|
-
end
|
205
|
-
|
206
|
-
def apply(row)
|
207
|
-
row.merge!({
|
208
|
-
'make' => row['MFR'], # make it line up with the errata
|
209
|
-
'model' => row['CAR LINE'], # ditto
|
210
|
-
'drive' => row['DRIVE SYS'] + 'WD',
|
211
|
-
'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
|
212
|
-
'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
|
213
|
-
'turbo' => row['TURBO'] == 'T',
|
214
|
-
'supercharger' => row['SPCHGR'] == 'S',
|
215
|
-
'injection' => true,
|
216
|
-
'year' => year
|
217
|
-
})
|
218
|
-
row
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
class Guru
|
224
|
-
# the following matching methods are needed by the errata
|
225
|
-
# per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
|
226
|
-
|
227
|
-
def transmission_is_blank?(row)
|
228
|
-
row['transmission'].blank?
|
229
|
-
end
|
230
|
-
|
231
|
-
def is_a_2007_gmc_or_chevrolet?(row)
|
232
|
-
row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
|
233
|
-
end
|
234
|
-
|
235
|
-
def is_a_porsche?(row)
|
236
|
-
row['make'].upcase == 'PORSCHE'
|
237
|
-
end
|
238
|
-
|
239
|
-
def is_not_a_porsche?(row)
|
240
|
-
!is_a_porsche? row
|
241
|
-
end
|
242
|
-
|
243
|
-
def is_a_mercedes_benz?(row)
|
244
|
-
row['make'] =~ /MERCEDES/i
|
245
|
-
end
|
246
|
-
|
247
|
-
def is_a_lexus?(row)
|
248
|
-
row['make'].upcase == 'LEXUS'
|
249
|
-
end
|
250
|
-
|
251
|
-
def is_a_bmw?(row)
|
252
|
-
row['make'].upcase == 'BMW'
|
253
|
-
end
|
254
|
-
|
255
|
-
def is_a_ford?(row)
|
256
|
-
row['make'].upcase == 'FORD'
|
257
|
-
end
|
258
|
-
|
259
|
-
def is_a_rolls_royce_and_model_contains_bentley?(row)
|
260
|
-
is_a_rolls_royce?(row) and model_contains_bentley?(row)
|
261
|
-
end
|
262
|
-
|
263
|
-
def is_a_bentley?(row)
|
264
|
-
row['make'].upcase == 'BENTLEY'
|
265
|
-
end
|
266
|
-
|
267
|
-
def is_a_rolls_royce?(row)
|
268
|
-
row['make'] =~ /ROLLS/i
|
269
|
-
end
|
270
|
-
|
271
|
-
def is_a_turbo_brooklands?(row)
|
272
|
-
row['model'] =~ /TURBO R\/RL BKLDS/i
|
273
|
-
end
|
274
|
-
|
275
|
-
def model_contains_maybach?(row)
|
276
|
-
row['model'] =~ /MAYBACH/i
|
277
|
-
end
|
278
|
-
|
279
|
-
def model_contains_bentley?(row)
|
280
|
-
row['model'] =~ /BENTLEY/i
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
|
285
|
-
:responder => AutomobileVariant::Guru.new
|
286
|
-
|
287
|
-
data_miner do
|
288
|
-
# 1985---1997
|
289
|
-
(85..97).each do |yy|
|
290
|
-
filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
|
291
|
-
import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
|
292
|
-
:filename => filename,
|
293
|
-
:transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
|
294
|
-
:errata => errata) do
|
295
|
-
key 'row_hash'
|
296
|
-
store 'make_name', :field_name => 'make'
|
297
|
-
store 'model_name', :field_name => 'model'
|
298
|
-
store 'year'
|
299
|
-
store 'fuel_type_code', :field_name => 'fuel_type'
|
300
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
301
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
302
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
303
|
-
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
304
|
-
store 'cylinders', :field_name => 'no_cyc'
|
305
|
-
store 'drive', :field_name => 'drive_system'
|
306
|
-
store 'carline_mfr_code'
|
307
|
-
store 'vi_mfr_code'
|
308
|
-
store 'carline_code'
|
309
|
-
store 'carline_class_code', :field_name => 'carline_clss'
|
310
|
-
store 'transmission'
|
311
|
-
store 'speeds'
|
312
|
-
store 'turbo'
|
313
|
-
store 'supercharger'
|
314
|
-
store 'injection'
|
315
|
-
store 'displacement'
|
316
|
-
end
|
317
|
-
end
|
318
|
-
|
319
|
-
# 1998--2005
|
320
|
-
{
|
321
|
-
1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
|
322
|
-
1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
|
323
|
-
2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
|
324
|
-
2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
|
325
|
-
2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
|
326
|
-
2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
|
327
|
-
2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
|
328
|
-
2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
|
329
|
-
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
330
|
-
import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
|
331
|
-
:errata => errata) do
|
332
|
-
key 'row_hash'
|
333
|
-
store 'make_name', :field_name => 'make'
|
334
|
-
store 'model_name', :field_name => 'model'
|
335
|
-
store 'fuel_type_code', :field_name => 'fl'
|
336
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
337
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
338
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
339
|
-
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
340
|
-
store 'cylinders', :field_name => 'cyl'
|
341
|
-
store 'displacement', :field_name => 'displ'
|
342
|
-
store 'carline_class_code', :field_name => 'cls' if year >= 2000
|
343
|
-
store 'carline_class_name', :field_name => 'Class'
|
344
|
-
store 'year'
|
345
|
-
store 'transmission'
|
346
|
-
store 'speeds'
|
347
|
-
store 'turbo'
|
348
|
-
store 'supercharger'
|
349
|
-
store 'injection'
|
350
|
-
store 'drive'
|
351
|
-
end
|
352
|
-
end
|
353
|
-
|
354
|
-
# 2006--2010
|
355
|
-
{
|
356
|
-
2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
|
357
|
-
2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
|
358
|
-
2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
|
359
|
-
2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
|
360
|
-
# 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
|
361
|
-
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
362
|
-
import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
|
363
|
-
:errata => errata) do
|
364
|
-
key 'row_hash'
|
365
|
-
store 'make_name', :field_name => 'make'
|
366
|
-
store 'model_name', :field_name => 'model'
|
367
|
-
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
368
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
369
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
370
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
371
|
-
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
372
|
-
store 'cylinders', :field_name => 'NUMB CYL'
|
373
|
-
store 'displacement', :field_name => 'DISPLACEMENT'
|
374
|
-
store 'carline_class_code', :field_name => 'CLS'
|
375
|
-
store 'carline_class_name', :field_name => 'CLASS'
|
376
|
-
store 'year'
|
377
|
-
store 'transmission'
|
378
|
-
store 'speeds'
|
379
|
-
store 'turbo'
|
380
|
-
store 'supercharger'
|
381
|
-
store 'injection'
|
382
|
-
store 'drive'
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
# associate :make, :key => :original_automobile_make_name, :foreign_key => :name
|
387
|
-
# derive :automobile_model_id # creates models by name
|
388
|
-
# associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
|
389
|
-
|
390
|
-
process 'Set adjusted fuel economy' do
|
391
|
-
update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
|
392
|
-
update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
def name
|
397
|
-
extra = []
|
398
|
-
extra << "V#{cylinders}" if cylinders
|
399
|
-
extra << "#{displacement}L" if displacement
|
400
|
-
extra << "turbo" if turbo
|
401
|
-
extra << "FI" if injection
|
402
|
-
extra << "#{speeds}spd" if speeds.present?
|
403
|
-
extra << transmission if transmission.present?
|
404
|
-
extra << "(#{fuel_type.name})" if fuel_type
|
405
|
-
extra.join(' ')
|
406
|
-
end
|
407
|
-
|
408
|
-
def fuel_economy_description
|
409
|
-
[ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
class Country < ActiveRecord::Base
|
414
|
-
set_primary_key :iso_3166
|
415
|
-
|
416
|
-
data_miner do
|
417
|
-
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
418
|
-
key 'iso_3166', :field_number => 1
|
419
|
-
store 'name', :field_number => 0
|
420
|
-
end
|
421
|
-
|
422
|
-
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
423
|
-
key 'iso_3166', :field_name => 'country code'
|
424
|
-
store 'name', :field_name => 'country'
|
425
|
-
end
|
426
|
-
end
|
427
|
-
end
|
428
|
-
|
429
|
-
class Airport < ActiveRecord::Base
|
430
|
-
set_primary_key :iata_code
|
431
|
-
|
432
|
-
data_miner do
|
433
|
-
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
434
|
-
key 'iata_code', :field_number => 4
|
435
|
-
store 'name', :field_number => 1
|
436
|
-
store 'city', :field_number => 2
|
437
|
-
store 'country_name', :field_number => 3
|
438
|
-
store 'latitude', :field_number => 6
|
439
|
-
store 'longitude', :field_number => 7
|
440
|
-
end
|
441
|
-
end
|
442
|
-
end
|
443
|
-
|
444
3
|
class TappedAirport < ActiveRecord::Base
|
445
4
|
set_primary_key :iata_code
|
446
5
|
|
@@ -947,103 +506,6 @@ class T100FlightSegment < ActiveRecord::Base
|
|
947
506
|
end
|
948
507
|
end
|
949
508
|
|
950
|
-
require 'loose_tight_dictionary'
|
951
|
-
class Aircraft < ActiveRecord::Base
|
952
|
-
set_primary_key :icao_code
|
953
|
-
|
954
|
-
def self.bts_dictionary
|
955
|
-
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
956
|
-
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
957
|
-
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
958
|
-
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
959
|
-
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
960
|
-
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
961
|
-
end
|
962
|
-
|
963
|
-
class BtsAircraftTypeCodeMatcher
|
964
|
-
def match(left_record)
|
965
|
-
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
966
|
-
right_record['Aircraft Type'] if right_record
|
967
|
-
end
|
968
|
-
end
|
969
|
-
|
970
|
-
class BtsNameMatcher
|
971
|
-
def match(left_record)
|
972
|
-
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
973
|
-
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
974
|
-
end
|
975
|
-
end
|
976
|
-
|
977
|
-
class Guru
|
978
|
-
# for errata
|
979
|
-
def is_attributed_to_boeing?(row)
|
980
|
-
row['Manufacturer'] =~ /BOEING/i
|
981
|
-
end
|
982
|
-
|
983
|
-
def is_attributed_to_cessna?(row)
|
984
|
-
row['Manufacturer'] =~ /CESSNA/i
|
985
|
-
end
|
986
|
-
|
987
|
-
def is_attributed_to_fokker?(row)
|
988
|
-
row['Manufacturer'] =~ /FOKKER/i
|
989
|
-
end
|
990
|
-
|
991
|
-
def is_not_attributed_to_aerospatiale?(row)
|
992
|
-
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
993
|
-
end
|
994
|
-
|
995
|
-
def is_not_attributed_to_cessna?(row)
|
996
|
-
not row['Manufacturer'] =~ /CESSNA/i
|
997
|
-
end
|
998
|
-
|
999
|
-
def is_not_attributed_to_learjet?(row)
|
1000
|
-
not row['Manufacturer'] =~ /LEAR/i
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
def is_not_attributed_to_dehavilland?(row)
|
1004
|
-
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
1005
|
-
end
|
1006
|
-
|
1007
|
-
def is_not_attributed_to_mcdonnell_douglas?(row)
|
1008
|
-
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
1009
|
-
end
|
1010
|
-
|
1011
|
-
def is_not_a_dc_plane?(row)
|
1012
|
-
not row['Model'] =~ /DC/i
|
1013
|
-
end
|
1014
|
-
|
1015
|
-
def is_a_crj_900?(row)
|
1016
|
-
row['Designator'].downcase == 'crj9'
|
1017
|
-
end
|
1018
|
-
end
|
1019
|
-
|
1020
|
-
data_miner do
|
1021
|
-
# ('A'..'Z').each do |letter|
|
1022
|
-
# Note: for the purposes of testing, only importing "D"
|
1023
|
-
%w{ D }.each do |letter|
|
1024
|
-
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1025
|
-
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1026
|
-
:encoding => 'US-ASCII',
|
1027
|
-
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1028
|
-
:responder => Aircraft::Guru.new),
|
1029
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
1030
|
-
:column_xpath => 'td') do
|
1031
|
-
key 'icao_code', :field_name => 'Designator'
|
1032
|
-
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
1033
|
-
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
1034
|
-
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1035
|
-
store 'name', :field_name => 'Model'
|
1036
|
-
end
|
1037
|
-
|
1038
|
-
import 'Brighter Planet aircraft class codes',
|
1039
|
-
:url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
|
1040
|
-
key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
|
1041
|
-
store 'brighter_planet_aircraft_class_code'
|
1042
|
-
end
|
1043
|
-
end
|
1044
|
-
end
|
1045
|
-
end
|
1046
|
-
|
1047
509
|
# note that this depends on stuff in Aircraft
|
1048
510
|
class AircraftDeux < ActiveRecord::Base
|
1049
511
|
set_primary_key :icao_code
|
@@ -1166,7 +628,19 @@ end
|
|
1166
628
|
|
1167
629
|
# todo: have somebody properly organize these
|
1168
630
|
class DataMinerTest < Test::Unit::TestCase
|
1169
|
-
if ENV['
|
631
|
+
if ENV['WIP']
|
632
|
+
context 'with nullify option' do
|
633
|
+
should 'treat blank fields as null values' do
|
634
|
+
Aircraft.delete_all
|
635
|
+
Aircraft.data_miner_runs.delete_all
|
636
|
+
Aircraft.run_data_miner!
|
637
|
+
assert_greater_than 0, Aircraft.count
|
638
|
+
assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
|
639
|
+
end
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
if ENV['ALL'] == 'true'
|
1170
644
|
should 'directly create a table for the model' do
|
1171
645
|
if AutomobileMakeFleetYear.table_exists?
|
1172
646
|
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'loose_tight_dictionary'
|
2
|
+
|
3
|
+
class Aircraft < ActiveRecord::Base
|
4
|
+
set_primary_key :icao_code
|
5
|
+
set_table_name 'aircraft'
|
6
|
+
|
7
|
+
def self.bts_dictionary
|
8
|
+
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
9
|
+
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
10
|
+
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
11
|
+
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
12
|
+
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
13
|
+
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
14
|
+
end
|
15
|
+
|
16
|
+
class BtsAircraftTypeCodeMatcher
|
17
|
+
def match(left_record)
|
18
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
19
|
+
right_record['Aircraft Type'] if right_record
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class BtsNameMatcher
|
24
|
+
def match(left_record)
|
25
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
26
|
+
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Guru
|
31
|
+
# for errata
|
32
|
+
def is_attributed_to_boeing?(row)
|
33
|
+
row['Manufacturer'] =~ /BOEING/i
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_attributed_to_cessna?(row)
|
37
|
+
row['Manufacturer'] =~ /CESSNA/i
|
38
|
+
end
|
39
|
+
|
40
|
+
def is_attributed_to_fokker?(row)
|
41
|
+
row['Manufacturer'] =~ /FOKKER/i
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_not_attributed_to_aerospatiale?(row)
|
45
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
46
|
+
end
|
47
|
+
|
48
|
+
def is_not_attributed_to_cessna?(row)
|
49
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_not_attributed_to_learjet?(row)
|
53
|
+
not row['Manufacturer'] =~ /LEAR/i
|
54
|
+
end
|
55
|
+
|
56
|
+
def is_not_attributed_to_dehavilland?(row)
|
57
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
58
|
+
end
|
59
|
+
|
60
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
61
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_not_a_dc_plane?(row)
|
65
|
+
not row['Model'] =~ /DC/i
|
66
|
+
end
|
67
|
+
|
68
|
+
def is_a_crj_900?(row)
|
69
|
+
row['Designator'].downcase == 'crj9'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
data_miner do
|
74
|
+
# ('A'..'Z').each do |letter|
|
75
|
+
# Note: for the purposes of testing, only importing "D"
|
76
|
+
%w{ D }.each do |letter|
|
77
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
78
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
79
|
+
:encoding => 'US-ASCII',
|
80
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
81
|
+
:responder => Aircraft::Guru.new),
|
82
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
83
|
+
:column_xpath => 'td') do
|
84
|
+
key 'icao_code', :field_name => 'Designator'
|
85
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new, :nullify => true
|
86
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new, :nullify => true
|
87
|
+
store 'manufacturer_name', :field_name => 'Manufacturer', :nullify => true
|
88
|
+
store 'name', :field_name => 'Model', :nullify => true
|
89
|
+
end
|
90
|
+
|
91
|
+
import 'Brighter Planet aircraft class codes',
|
92
|
+
:url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
|
93
|
+
key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
|
94
|
+
store 'brighter_planet_aircraft_class_code', :nullify => true
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Airport < ActiveRecord::Base
|
2
|
+
set_primary_key :iata_code
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
6
|
+
key 'iata_code', :field_number => 4
|
7
|
+
store 'name', :field_number => 1
|
8
|
+
store 'city', :field_number => 2
|
9
|
+
store 'country_name', :field_number => 3
|
10
|
+
store 'latitude', :field_number => 6, :nullify => true
|
11
|
+
store 'longitude', :field_number => 7, :nullify => true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class AutomobileFuelType < ActiveRecord::Base
|
2
|
+
set_primary_key :code
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
6
|
+
:filename => 'Gd6-dsc.txt',
|
7
|
+
:format => :fixed_width,
|
8
|
+
:crop => 21..26, # inclusive
|
9
|
+
:cut => '2-',
|
10
|
+
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
11
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
12
|
+
[ 'spacer', 2 ],
|
13
|
+
[ 'name', 52, { :type => :string } ]]) do
|
14
|
+
key 'code'
|
15
|
+
store 'name'
|
16
|
+
end
|
17
|
+
|
18
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
|
19
|
+
key 'code'
|
20
|
+
store 'name'
|
21
|
+
store 'annual_distance'
|
22
|
+
store 'emission_factor'
|
23
|
+
end
|
24
|
+
|
25
|
+
# pull electricity emission factor from residential electricity
|
26
|
+
import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
|
27
|
+
:select => lambda { |row| row['code'] == 'El' }) do
|
28
|
+
key 'code'
|
29
|
+
store 'name'
|
30
|
+
store 'emission_factor'
|
31
|
+
end
|
32
|
+
|
33
|
+
# still need distance estimate for electric cars
|
34
|
+
end
|
35
|
+
|
36
|
+
CODES = {
|
37
|
+
:electricity => 'El',
|
38
|
+
:diesel => 'D'
|
39
|
+
}
|
40
|
+
end
|
@@ -0,0 +1,368 @@
|
|
1
|
+
class AutomobileVariant < ActiveRecord::Base
|
2
|
+
set_primary_key :row_hash
|
3
|
+
|
4
|
+
module FuelEconomyGuide
|
5
|
+
TRANSMISSIONS = {
|
6
|
+
'A' => 'automatic',
|
7
|
+
'M' => 'manual',
|
8
|
+
'L' => 'automatic', # Lockup/automatic
|
9
|
+
'S' => 'semiautomatic', # Semiautomatic
|
10
|
+
'C' => 'manual' # TODO verify for VW Syncro
|
11
|
+
}
|
12
|
+
|
13
|
+
ENGINE_TYPES = {
|
14
|
+
'(GUZZLER)' => nil, # "gas guzzler"
|
15
|
+
'(POLICE)' => nil, # police automobile_variant
|
16
|
+
'(MPFI)' => 'injection',
|
17
|
+
'(MPI*)' => 'injection',
|
18
|
+
'(SPFI)' => 'injection',
|
19
|
+
'(FFS)' => 'injection',
|
20
|
+
'(TURBO)' => 'turbo',
|
21
|
+
'(TRBO)' => 'turbo',
|
22
|
+
'(TC*)' => 'turbo',
|
23
|
+
'(FFS,TRBO)' => %w(injection turbo),
|
24
|
+
'(S-CHARGE)' => 'supercharger',
|
25
|
+
'(SC*)' => 'supercharger',
|
26
|
+
'(DIESEL)' => nil, # diesel
|
27
|
+
'(DSL)' => nil, # diesel
|
28
|
+
'(ROTARY)' => nil, # rotary
|
29
|
+
'(VARIABLE)' => nil, # variable displacement
|
30
|
+
'(NO-CAT)' => nil, # no catalytic converter
|
31
|
+
'(OHC)' => nil, # overhead camshaft
|
32
|
+
'(OHV)' => nil, # overhead valves
|
33
|
+
'(16-VALVE)' => nil, # 16V
|
34
|
+
'(305)' => nil, # 305 cubic inch displacement
|
35
|
+
'(307)' => nil, # 307 cubic inch displacement
|
36
|
+
'(M-ENG)' => nil,
|
37
|
+
'(W-ENG)' => nil,
|
38
|
+
'(GM-BUICK)' => nil,
|
39
|
+
'(GM-CHEV)' => nil,
|
40
|
+
'(GM-OLDS)' => nil,
|
41
|
+
'(GM-PONT)' => nil,
|
42
|
+
}
|
43
|
+
|
44
|
+
class ParserB
|
45
|
+
attr_accessor :year
|
46
|
+
def initialize(options = {})
|
47
|
+
@year = options[:year]
|
48
|
+
end
|
49
|
+
|
50
|
+
def apply(row)
|
51
|
+
row.merge!({
|
52
|
+
'make' => row['carline_mfr_name'], # make it line up with the errata
|
53
|
+
'model' => row['carline_name'], # ditto
|
54
|
+
'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
|
55
|
+
'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
|
56
|
+
'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
|
57
|
+
'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
|
58
|
+
'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
|
59
|
+
'displacement' => _displacement(row['opt_disp']),
|
60
|
+
'year' => year
|
61
|
+
})
|
62
|
+
row
|
63
|
+
end
|
64
|
+
|
65
|
+
def _displacement(str)
|
66
|
+
str = str.gsub(/[\(\)]/, '').strip
|
67
|
+
if str =~ /^(.+)L$/
|
68
|
+
$1.to_f
|
69
|
+
elsif str =~ /^(.+)CC$/
|
70
|
+
$1.to_f / 1000
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_hints!(bus)
|
75
|
+
bus[:format] = :fixed_width
|
76
|
+
bus[:cut] = '13-' if year == 1995
|
77
|
+
bus[:schema_name] = :fuel_economy_guide_b
|
78
|
+
bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
|
79
|
+
Slither.define :fuel_economy_guide_b do |d|
|
80
|
+
d.rows do |row|
|
81
|
+
row.trap { true } # there's only one section
|
82
|
+
row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
|
83
|
+
row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
|
84
|
+
row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
|
85
|
+
row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
|
86
|
+
row.column 'carline_name' , 28, :type => :string # CARLINE NAME
|
87
|
+
row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
|
88
|
+
row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
|
89
|
+
row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
|
90
|
+
row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
|
91
|
+
row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
|
92
|
+
row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
|
93
|
+
row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
|
94
|
+
row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
|
95
|
+
row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
|
96
|
+
row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
|
97
|
+
row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
|
98
|
+
row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
|
99
|
+
row.spacer 2
|
100
|
+
row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
|
101
|
+
row.spacer 2
|
102
|
+
row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
|
103
|
+
row.spacer 2
|
104
|
+
row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
|
105
|
+
row.spacer 2
|
106
|
+
row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
|
107
|
+
row.spacer 2
|
108
|
+
row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
|
109
|
+
row.spacer 2
|
110
|
+
row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
|
111
|
+
row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
|
112
|
+
row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
|
113
|
+
row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
|
114
|
+
row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
|
115
|
+
row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
116
|
+
row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
117
|
+
row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
118
|
+
row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
|
119
|
+
row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
|
120
|
+
row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
|
121
|
+
row.column 'filler' , 1, :type => :string # NOT USED
|
122
|
+
row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
|
123
|
+
row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
class ParserC
|
129
|
+
attr_accessor :year
|
130
|
+
def initialize(options = {})
|
131
|
+
@year = options[:year]
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_hints!(bus)
|
135
|
+
# File will decide format based on filename
|
136
|
+
end
|
137
|
+
|
138
|
+
def apply(row)
|
139
|
+
row.merge!({
|
140
|
+
'make' => row['Manufacturer'], # make it line up with the errata
|
141
|
+
'model' => row['carline name'], # ditto
|
142
|
+
'drive' => row['drv'] + 'WD',
|
143
|
+
'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
|
144
|
+
'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
|
145
|
+
'turbo' => row['T'] == 'T',
|
146
|
+
'supercharger' => row['S'] == 'S',
|
147
|
+
'injection' => true,
|
148
|
+
'year' => year
|
149
|
+
})
|
150
|
+
row
|
151
|
+
end
|
152
|
+
end
|
153
|
+
class ParserD
|
154
|
+
attr_accessor :year
|
155
|
+
def initialize(options = {})
|
156
|
+
@year = options[:year]
|
157
|
+
end
|
158
|
+
|
159
|
+
def add_hints!(bus)
|
160
|
+
bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
|
161
|
+
end
|
162
|
+
|
163
|
+
def apply(row)
|
164
|
+
row.merge!({
|
165
|
+
'make' => row['MFR'], # make it line up with the errata
|
166
|
+
'model' => row['CAR LINE'], # ditto
|
167
|
+
'drive' => row['DRIVE SYS'] + 'WD',
|
168
|
+
'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
|
169
|
+
'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
|
170
|
+
'turbo' => row['TURBO'] == 'T',
|
171
|
+
'supercharger' => row['SPCHGR'] == 'S',
|
172
|
+
'injection' => true,
|
173
|
+
'year' => year
|
174
|
+
})
|
175
|
+
row
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
class Guru
|
181
|
+
# the following matching methods are needed by the errata
|
182
|
+
# per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
|
183
|
+
|
184
|
+
def transmission_is_blank?(row)
|
185
|
+
row['transmission'].blank?
|
186
|
+
end
|
187
|
+
|
188
|
+
def is_a_2007_gmc_or_chevrolet?(row)
|
189
|
+
row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
|
190
|
+
end
|
191
|
+
|
192
|
+
def is_a_porsche?(row)
|
193
|
+
row['make'].upcase == 'PORSCHE'
|
194
|
+
end
|
195
|
+
|
196
|
+
def is_not_a_porsche?(row)
|
197
|
+
!is_a_porsche? row
|
198
|
+
end
|
199
|
+
|
200
|
+
def is_a_mercedes_benz?(row)
|
201
|
+
row['make'] =~ /MERCEDES/i
|
202
|
+
end
|
203
|
+
|
204
|
+
def is_a_lexus?(row)
|
205
|
+
row['make'].upcase == 'LEXUS'
|
206
|
+
end
|
207
|
+
|
208
|
+
def is_a_bmw?(row)
|
209
|
+
row['make'].upcase == 'BMW'
|
210
|
+
end
|
211
|
+
|
212
|
+
def is_a_ford?(row)
|
213
|
+
row['make'].upcase == 'FORD'
|
214
|
+
end
|
215
|
+
|
216
|
+
def is_a_rolls_royce_and_model_contains_bentley?(row)
|
217
|
+
is_a_rolls_royce?(row) and model_contains_bentley?(row)
|
218
|
+
end
|
219
|
+
|
220
|
+
def is_a_bentley?(row)
|
221
|
+
row['make'].upcase == 'BENTLEY'
|
222
|
+
end
|
223
|
+
|
224
|
+
def is_a_rolls_royce?(row)
|
225
|
+
row['make'] =~ /ROLLS/i
|
226
|
+
end
|
227
|
+
|
228
|
+
def is_a_turbo_brooklands?(row)
|
229
|
+
row['model'] =~ /TURBO R\/RL BKLDS/i
|
230
|
+
end
|
231
|
+
|
232
|
+
def model_contains_maybach?(row)
|
233
|
+
row['model'] =~ /MAYBACH/i
|
234
|
+
end
|
235
|
+
|
236
|
+
def model_contains_bentley?(row)
|
237
|
+
row['model'] =~ /BENTLEY/i
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
|
242
|
+
:responder => AutomobileVariant::Guru.new
|
243
|
+
|
244
|
+
data_miner do
|
245
|
+
# 1985---1997
|
246
|
+
(85..97).each do |yy|
|
247
|
+
filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
|
248
|
+
import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
|
249
|
+
:filename => filename,
|
250
|
+
:transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
|
251
|
+
:errata => errata) do
|
252
|
+
key 'row_hash'
|
253
|
+
store 'make_name', :field_name => 'make'
|
254
|
+
store 'model_name', :field_name => 'model'
|
255
|
+
store 'year'
|
256
|
+
store 'fuel_type_code', :field_name => 'fuel_type'
|
257
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
258
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
259
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
260
|
+
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
261
|
+
store 'cylinders', :field_name => 'no_cyc'
|
262
|
+
store 'drive', :field_name => 'drive_system'
|
263
|
+
store 'carline_mfr_code'
|
264
|
+
store 'vi_mfr_code'
|
265
|
+
store 'carline_code'
|
266
|
+
store 'carline_class_code', :field_name => 'carline_clss'
|
267
|
+
store 'transmission'
|
268
|
+
store 'speeds'
|
269
|
+
store 'turbo'
|
270
|
+
store 'supercharger'
|
271
|
+
store 'injection'
|
272
|
+
store 'displacement'
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# 1998--2005
|
277
|
+
{
|
278
|
+
1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
|
279
|
+
1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
|
280
|
+
2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
|
281
|
+
2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
|
282
|
+
2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
|
283
|
+
2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
|
284
|
+
2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
|
285
|
+
2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
|
286
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
287
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
|
288
|
+
:errata => errata) do
|
289
|
+
key 'row_hash'
|
290
|
+
store 'make_name', :field_name => 'make'
|
291
|
+
store 'model_name', :field_name => 'model'
|
292
|
+
store 'fuel_type_code', :field_name => 'fl'
|
293
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
294
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
295
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
296
|
+
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
297
|
+
store 'cylinders', :field_name => 'cyl'
|
298
|
+
store 'displacement', :field_name => 'displ'
|
299
|
+
store 'carline_class_code', :field_name => 'cls' if year >= 2000
|
300
|
+
store 'carline_class_name', :field_name => 'Class'
|
301
|
+
store 'year'
|
302
|
+
store 'transmission'
|
303
|
+
store 'speeds'
|
304
|
+
store 'turbo'
|
305
|
+
store 'supercharger'
|
306
|
+
store 'injection'
|
307
|
+
store 'drive'
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
# 2006--2010
|
312
|
+
{
|
313
|
+
2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
|
314
|
+
2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
|
315
|
+
2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
|
316
|
+
2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
|
317
|
+
# 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
|
318
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
319
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
|
320
|
+
:errata => errata) do
|
321
|
+
key 'row_hash'
|
322
|
+
store 'make_name', :field_name => 'make'
|
323
|
+
store 'model_name', :field_name => 'model'
|
324
|
+
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
325
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
326
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
327
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
328
|
+
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
329
|
+
store 'cylinders', :field_name => 'NUMB CYL'
|
330
|
+
store 'displacement', :field_name => 'DISPLACEMENT'
|
331
|
+
store 'carline_class_code', :field_name => 'CLS'
|
332
|
+
store 'carline_class_name', :field_name => 'CLASS'
|
333
|
+
store 'year'
|
334
|
+
store 'transmission'
|
335
|
+
store 'speeds'
|
336
|
+
store 'turbo'
|
337
|
+
store 'supercharger'
|
338
|
+
store 'injection'
|
339
|
+
store 'drive'
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# associate :make, :key => :original_automobile_make_name, :foreign_key => :name
|
344
|
+
# derive :automobile_model_id # creates models by name
|
345
|
+
# associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
|
346
|
+
|
347
|
+
process 'Set adjusted fuel economy' do
|
348
|
+
update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
|
349
|
+
update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def name
|
354
|
+
extra = []
|
355
|
+
extra << "V#{cylinders}" if cylinders
|
356
|
+
extra << "#{displacement}L" if displacement
|
357
|
+
extra << "turbo" if turbo
|
358
|
+
extra << "FI" if injection
|
359
|
+
extra << "#{speeds}spd" if speeds.present?
|
360
|
+
extra << transmission if transmission.present?
|
361
|
+
extra << "(#{fuel_type.name})" if fuel_type
|
362
|
+
extra.join(' ')
|
363
|
+
end
|
364
|
+
|
365
|
+
def fuel_economy_description
|
366
|
+
[ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
|
367
|
+
end
|
368
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class Country < ActiveRecord::Base
|
2
|
+
set_primary_key :iso_3166
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
6
|
+
key 'iso_3166', :field_number => 1
|
7
|
+
store 'name', :field_number => 0
|
8
|
+
end
|
9
|
+
|
10
|
+
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
11
|
+
key 'iso_3166', :field_name => 'country code'
|
12
|
+
store 'name', :field_name => 'country'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/test_helper.rb
CHANGED
@@ -6,6 +6,8 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
6
6
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
7
|
require 'data_miner'
|
8
8
|
|
9
|
+
ENV['WIP'] = true if ENV['ALL'] == 'true'
|
10
|
+
|
9
11
|
ActiveRecord::Base.establish_connection(
|
10
12
|
'adapter' => 'mysql',
|
11
13
|
'database' => 'data_miner_test',
|
@@ -13,11 +15,12 @@ ActiveRecord::Base.establish_connection(
|
|
13
15
|
'password' => 'password'
|
14
16
|
)
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
Dir.glob(File.expand_path('support/*.rb', File.dirname(__FILE__))).each do |lib|
|
19
|
+
require lib
|
18
20
|
end
|
19
21
|
|
20
|
-
|
22
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
23
|
+
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
|
21
24
|
end
|
22
25
|
|
23
26
|
ActiveRecord::Schema.define(:version => 20090819143429) do
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
8
|
+
- 6
|
9
|
+
version: 0.5.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-
|
19
|
+
date: 2010-11-01 00:00:00 -04:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -270,6 +270,12 @@ files:
|
|
270
270
|
- lib/data_miner/run.rb
|
271
271
|
- lib/data_miner/schema.rb
|
272
272
|
- lib/data_miner/tap.rb
|
273
|
+
- test/data_miner/attribute_test.rb
|
274
|
+
- test/support/airport.rb
|
275
|
+
- test/support/country.rb
|
276
|
+
- test/support/automobile_fuel_type.rb
|
277
|
+
- test/support/aircraft.rb
|
278
|
+
- test/support/automobile_variant.rb
|
273
279
|
- test/data_miner_test.rb
|
274
280
|
- test/test_helper.rb
|
275
281
|
has_rdoc: true
|
@@ -307,5 +313,11 @@ signing_key:
|
|
307
313
|
specification_version: 3
|
308
314
|
summary: Mine remote data into your ActiveRecord models.
|
309
315
|
test_files:
|
316
|
+
- test/data_miner/attribute_test.rb
|
317
|
+
- test/support/airport.rb
|
318
|
+
- test/support/country.rb
|
319
|
+
- test/support/automobile_fuel_type.rb
|
320
|
+
- test/support/aircraft.rb
|
321
|
+
- test/support/automobile_variant.rb
|
310
322
|
- test/data_miner_test.rb
|
311
323
|
- test/test_helper.rb
|