data_miner 0.5.5 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/data_miner/attribute.rb +2 -1
- data/test/data_miner/attribute_test.rb +55 -0
- data/test/data_miner_test.rb +13 -539
- data/test/support/aircraft.rb +99 -0
- data/test/support/airport.rb +14 -0
- data/test/support/automobile_fuel_type.rb +40 -0
- data/test/support/automobile_variant.rb +368 -0
- data/test/support/country.rb +15 -0
- data/test/test_helper.rb +6 -3
- metadata +15 -3
data/lib/data_miner/attribute.rb
CHANGED
@@ -84,6 +84,7 @@ module DataMiner
|
|
84
84
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
85
85
|
value = value_in_dictionary value if wants_dictionary?
|
86
86
|
value = synthesize.call(row) if wants_synthesize?
|
87
|
+
value = nil if value.blank? and wants_nullification?
|
87
88
|
value
|
88
89
|
end
|
89
90
|
|
@@ -150,7 +151,7 @@ module DataMiner
|
|
150
151
|
options.has_key? :static
|
151
152
|
end
|
152
153
|
def wants_nullification?
|
153
|
-
nullify
|
154
|
+
nullify == true
|
154
155
|
end
|
155
156
|
def wants_chars?
|
156
157
|
chars.present?
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AttributeTest < Test::Unit::TestCase
|
4
|
+
context '#value_from_row' do
|
5
|
+
setup do
|
6
|
+
@airport = Airport.new
|
7
|
+
end
|
8
|
+
context 'nullify is true' do
|
9
|
+
setup do
|
10
|
+
@attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
|
11
|
+
end
|
12
|
+
should 'return nil if field is blank' do
|
13
|
+
assert_nil @attribute.value_from_row(
|
14
|
+
'name' => 'DTW',
|
15
|
+
'city' => 'Warren',
|
16
|
+
'country_name' => 'US',
|
17
|
+
'latitude' => '',
|
18
|
+
'longitude' => ''
|
19
|
+
)
|
20
|
+
end
|
21
|
+
should 'return the value if field is not blank' do
|
22
|
+
assert_equal '12.34', @attribute.value_from_row(
|
23
|
+
'name' => 'DTW',
|
24
|
+
'city' => 'Warren',
|
25
|
+
'country_name' => 'US',
|
26
|
+
'latitude' => '12.34',
|
27
|
+
'longitude' => ''
|
28
|
+
)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
context 'nullify is false' do
|
32
|
+
setup do
|
33
|
+
@attribute = DataMiner::Attribute.new @airport, 'latitude'
|
34
|
+
end
|
35
|
+
should 'return the value if field is not blank' do
|
36
|
+
assert_equal '12.34', @attribute.value_from_row(
|
37
|
+
'name' => 'DTW',
|
38
|
+
'city' => 'Warren',
|
39
|
+
'country_name' => 'US',
|
40
|
+
'latitude' => '12.34',
|
41
|
+
'longitude' => ''
|
42
|
+
)
|
43
|
+
end
|
44
|
+
should 'return blank if field is blank' do
|
45
|
+
assert_equal '', @attribute.value_from_row(
|
46
|
+
'name' => 'DTW',
|
47
|
+
'city' => 'Warren',
|
48
|
+
'country_name' => 'US',
|
49
|
+
'latitude' => '',
|
50
|
+
'longitude' => ''
|
51
|
+
)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1,446 +1,5 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
|
-
class AutomobileFuelType < ActiveRecord::Base
|
4
|
-
set_primary_key :code
|
5
|
-
|
6
|
-
data_miner do
|
7
|
-
import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
8
|
-
:filename => 'Gd6-dsc.txt',
|
9
|
-
:format => :fixed_width,
|
10
|
-
:crop => 21..26, # inclusive
|
11
|
-
:cut => '2-',
|
12
|
-
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
13
|
-
:schema => [[ 'code', 2, { :type => :string } ],
|
14
|
-
[ 'spacer', 2 ],
|
15
|
-
[ 'name', 52, { :type => :string } ]]) do
|
16
|
-
key 'code'
|
17
|
-
store 'name'
|
18
|
-
end
|
19
|
-
|
20
|
-
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
|
21
|
-
key 'code'
|
22
|
-
store 'name'
|
23
|
-
store 'annual_distance'
|
24
|
-
store 'emission_factor'
|
25
|
-
end
|
26
|
-
|
27
|
-
# pull electricity emission factor from residential electricity
|
28
|
-
import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
|
29
|
-
:select => lambda { |row| row['code'] == 'El' }) do
|
30
|
-
key 'code'
|
31
|
-
store 'name'
|
32
|
-
store 'emission_factor'
|
33
|
-
end
|
34
|
-
|
35
|
-
# still need distance estimate for electric cars
|
36
|
-
end
|
37
|
-
|
38
|
-
CODES = {
|
39
|
-
:electricity => 'El',
|
40
|
-
:diesel => 'D'
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
44
|
-
class AutomobileVariant < ActiveRecord::Base
|
45
|
-
set_primary_key :row_hash
|
46
|
-
|
47
|
-
module FuelEconomyGuide
|
48
|
-
TRANSMISSIONS = {
|
49
|
-
'A' => 'automatic',
|
50
|
-
'M' => 'manual',
|
51
|
-
'L' => 'automatic', # Lockup/automatic
|
52
|
-
'S' => 'semiautomatic', # Semiautomatic
|
53
|
-
'C' => 'manual' # TODO verify for VW Syncro
|
54
|
-
}
|
55
|
-
|
56
|
-
ENGINE_TYPES = {
|
57
|
-
'(GUZZLER)' => nil, # "gas guzzler"
|
58
|
-
'(POLICE)' => nil, # police automobile_variant
|
59
|
-
'(MPFI)' => 'injection',
|
60
|
-
'(MPI*)' => 'injection',
|
61
|
-
'(SPFI)' => 'injection',
|
62
|
-
'(FFS)' => 'injection',
|
63
|
-
'(TURBO)' => 'turbo',
|
64
|
-
'(TRBO)' => 'turbo',
|
65
|
-
'(TC*)' => 'turbo',
|
66
|
-
'(FFS,TRBO)' => %w(injection turbo),
|
67
|
-
'(S-CHARGE)' => 'supercharger',
|
68
|
-
'(SC*)' => 'supercharger',
|
69
|
-
'(DIESEL)' => nil, # diesel
|
70
|
-
'(DSL)' => nil, # diesel
|
71
|
-
'(ROTARY)' => nil, # rotary
|
72
|
-
'(VARIABLE)' => nil, # variable displacement
|
73
|
-
'(NO-CAT)' => nil, # no catalytic converter
|
74
|
-
'(OHC)' => nil, # overhead camshaft
|
75
|
-
'(OHV)' => nil, # overhead valves
|
76
|
-
'(16-VALVE)' => nil, # 16V
|
77
|
-
'(305)' => nil, # 305 cubic inch displacement
|
78
|
-
'(307)' => nil, # 307 cubic inch displacement
|
79
|
-
'(M-ENG)' => nil,
|
80
|
-
'(W-ENG)' => nil,
|
81
|
-
'(GM-BUICK)' => nil,
|
82
|
-
'(GM-CHEV)' => nil,
|
83
|
-
'(GM-OLDS)' => nil,
|
84
|
-
'(GM-PONT)' => nil,
|
85
|
-
}
|
86
|
-
|
87
|
-
class ParserB
|
88
|
-
attr_accessor :year
|
89
|
-
def initialize(options = {})
|
90
|
-
@year = options[:year]
|
91
|
-
end
|
92
|
-
|
93
|
-
def apply(row)
|
94
|
-
row.merge!({
|
95
|
-
'make' => row['carline_mfr_name'], # make it line up with the errata
|
96
|
-
'model' => row['carline_name'], # ditto
|
97
|
-
'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
|
98
|
-
'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
|
99
|
-
'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
|
100
|
-
'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
|
101
|
-
'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
|
102
|
-
'displacement' => _displacement(row['opt_disp']),
|
103
|
-
'year' => year
|
104
|
-
})
|
105
|
-
row
|
106
|
-
end
|
107
|
-
|
108
|
-
def _displacement(str)
|
109
|
-
str = str.gsub(/[\(\)]/, '').strip
|
110
|
-
if str =~ /^(.+)L$/
|
111
|
-
$1.to_f
|
112
|
-
elsif str =~ /^(.+)CC$/
|
113
|
-
$1.to_f / 1000
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
def add_hints!(bus)
|
118
|
-
bus[:format] = :fixed_width
|
119
|
-
bus[:cut] = '13-' if year == 1995
|
120
|
-
bus[:schema_name] = :fuel_economy_guide_b
|
121
|
-
bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
|
122
|
-
Slither.define :fuel_economy_guide_b do |d|
|
123
|
-
d.rows do |row|
|
124
|
-
row.trap { true } # there's only one section
|
125
|
-
row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
|
126
|
-
row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
|
127
|
-
row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
|
128
|
-
row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
|
129
|
-
row.column 'carline_name' , 28, :type => :string # CARLINE NAME
|
130
|
-
row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
|
131
|
-
row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
|
132
|
-
row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
|
133
|
-
row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
|
134
|
-
row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
|
135
|
-
row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
|
136
|
-
row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
|
137
|
-
row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
|
138
|
-
row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
|
139
|
-
row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
|
140
|
-
row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
|
141
|
-
row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
|
142
|
-
row.spacer 2
|
143
|
-
row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
|
144
|
-
row.spacer 2
|
145
|
-
row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
|
146
|
-
row.spacer 2
|
147
|
-
row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
|
148
|
-
row.spacer 2
|
149
|
-
row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
|
150
|
-
row.spacer 2
|
151
|
-
row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
|
152
|
-
row.spacer 2
|
153
|
-
row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
|
154
|
-
row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
|
155
|
-
row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
|
156
|
-
row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
|
157
|
-
row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
|
158
|
-
row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
159
|
-
row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
160
|
-
row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
161
|
-
row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
|
162
|
-
row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
|
163
|
-
row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
|
164
|
-
row.column 'filler' , 1, :type => :string # NOT USED
|
165
|
-
row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
|
166
|
-
row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
class ParserC
|
172
|
-
attr_accessor :year
|
173
|
-
def initialize(options = {})
|
174
|
-
@year = options[:year]
|
175
|
-
end
|
176
|
-
|
177
|
-
def add_hints!(bus)
|
178
|
-
# File will decide format based on filename
|
179
|
-
end
|
180
|
-
|
181
|
-
def apply(row)
|
182
|
-
row.merge!({
|
183
|
-
'make' => row['Manufacturer'], # make it line up with the errata
|
184
|
-
'model' => row['carline name'], # ditto
|
185
|
-
'drive' => row['drv'] + 'WD',
|
186
|
-
'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
|
187
|
-
'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
|
188
|
-
'turbo' => row['T'] == 'T',
|
189
|
-
'supercharger' => row['S'] == 'S',
|
190
|
-
'injection' => true,
|
191
|
-
'year' => year
|
192
|
-
})
|
193
|
-
row
|
194
|
-
end
|
195
|
-
end
|
196
|
-
class ParserD
|
197
|
-
attr_accessor :year
|
198
|
-
def initialize(options = {})
|
199
|
-
@year = options[:year]
|
200
|
-
end
|
201
|
-
|
202
|
-
def add_hints!(bus)
|
203
|
-
bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
|
204
|
-
end
|
205
|
-
|
206
|
-
def apply(row)
|
207
|
-
row.merge!({
|
208
|
-
'make' => row['MFR'], # make it line up with the errata
|
209
|
-
'model' => row['CAR LINE'], # ditto
|
210
|
-
'drive' => row['DRIVE SYS'] + 'WD',
|
211
|
-
'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
|
212
|
-
'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
|
213
|
-
'turbo' => row['TURBO'] == 'T',
|
214
|
-
'supercharger' => row['SPCHGR'] == 'S',
|
215
|
-
'injection' => true,
|
216
|
-
'year' => year
|
217
|
-
})
|
218
|
-
row
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
class Guru
|
224
|
-
# the following matching methods are needed by the errata
|
225
|
-
# per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
|
226
|
-
|
227
|
-
def transmission_is_blank?(row)
|
228
|
-
row['transmission'].blank?
|
229
|
-
end
|
230
|
-
|
231
|
-
def is_a_2007_gmc_or_chevrolet?(row)
|
232
|
-
row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
|
233
|
-
end
|
234
|
-
|
235
|
-
def is_a_porsche?(row)
|
236
|
-
row['make'].upcase == 'PORSCHE'
|
237
|
-
end
|
238
|
-
|
239
|
-
def is_not_a_porsche?(row)
|
240
|
-
!is_a_porsche? row
|
241
|
-
end
|
242
|
-
|
243
|
-
def is_a_mercedes_benz?(row)
|
244
|
-
row['make'] =~ /MERCEDES/i
|
245
|
-
end
|
246
|
-
|
247
|
-
def is_a_lexus?(row)
|
248
|
-
row['make'].upcase == 'LEXUS'
|
249
|
-
end
|
250
|
-
|
251
|
-
def is_a_bmw?(row)
|
252
|
-
row['make'].upcase == 'BMW'
|
253
|
-
end
|
254
|
-
|
255
|
-
def is_a_ford?(row)
|
256
|
-
row['make'].upcase == 'FORD'
|
257
|
-
end
|
258
|
-
|
259
|
-
def is_a_rolls_royce_and_model_contains_bentley?(row)
|
260
|
-
is_a_rolls_royce?(row) and model_contains_bentley?(row)
|
261
|
-
end
|
262
|
-
|
263
|
-
def is_a_bentley?(row)
|
264
|
-
row['make'].upcase == 'BENTLEY'
|
265
|
-
end
|
266
|
-
|
267
|
-
def is_a_rolls_royce?(row)
|
268
|
-
row['make'] =~ /ROLLS/i
|
269
|
-
end
|
270
|
-
|
271
|
-
def is_a_turbo_brooklands?(row)
|
272
|
-
row['model'] =~ /TURBO R\/RL BKLDS/i
|
273
|
-
end
|
274
|
-
|
275
|
-
def model_contains_maybach?(row)
|
276
|
-
row['model'] =~ /MAYBACH/i
|
277
|
-
end
|
278
|
-
|
279
|
-
def model_contains_bentley?(row)
|
280
|
-
row['model'] =~ /BENTLEY/i
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
|
285
|
-
:responder => AutomobileVariant::Guru.new
|
286
|
-
|
287
|
-
data_miner do
|
288
|
-
# 1985---1997
|
289
|
-
(85..97).each do |yy|
|
290
|
-
filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
|
291
|
-
import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
|
292
|
-
:filename => filename,
|
293
|
-
:transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
|
294
|
-
:errata => errata) do
|
295
|
-
key 'row_hash'
|
296
|
-
store 'make_name', :field_name => 'make'
|
297
|
-
store 'model_name', :field_name => 'model'
|
298
|
-
store 'year'
|
299
|
-
store 'fuel_type_code', :field_name => 'fuel_type'
|
300
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
301
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
302
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
303
|
-
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
304
|
-
store 'cylinders', :field_name => 'no_cyc'
|
305
|
-
store 'drive', :field_name => 'drive_system'
|
306
|
-
store 'carline_mfr_code'
|
307
|
-
store 'vi_mfr_code'
|
308
|
-
store 'carline_code'
|
309
|
-
store 'carline_class_code', :field_name => 'carline_clss'
|
310
|
-
store 'transmission'
|
311
|
-
store 'speeds'
|
312
|
-
store 'turbo'
|
313
|
-
store 'supercharger'
|
314
|
-
store 'injection'
|
315
|
-
store 'displacement'
|
316
|
-
end
|
317
|
-
end
|
318
|
-
|
319
|
-
# 1998--2005
|
320
|
-
{
|
321
|
-
1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
|
322
|
-
1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
|
323
|
-
2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
|
324
|
-
2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
|
325
|
-
2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
|
326
|
-
2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
|
327
|
-
2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
|
328
|
-
2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
|
329
|
-
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
330
|
-
import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
|
331
|
-
:errata => errata) do
|
332
|
-
key 'row_hash'
|
333
|
-
store 'make_name', :field_name => 'make'
|
334
|
-
store 'model_name', :field_name => 'model'
|
335
|
-
store 'fuel_type_code', :field_name => 'fl'
|
336
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
337
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
338
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
339
|
-
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
340
|
-
store 'cylinders', :field_name => 'cyl'
|
341
|
-
store 'displacement', :field_name => 'displ'
|
342
|
-
store 'carline_class_code', :field_name => 'cls' if year >= 2000
|
343
|
-
store 'carline_class_name', :field_name => 'Class'
|
344
|
-
store 'year'
|
345
|
-
store 'transmission'
|
346
|
-
store 'speeds'
|
347
|
-
store 'turbo'
|
348
|
-
store 'supercharger'
|
349
|
-
store 'injection'
|
350
|
-
store 'drive'
|
351
|
-
end
|
352
|
-
end
|
353
|
-
|
354
|
-
# 2006--2010
|
355
|
-
{
|
356
|
-
2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
|
357
|
-
2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
|
358
|
-
2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
|
359
|
-
2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
|
360
|
-
# 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
|
361
|
-
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
362
|
-
import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
|
363
|
-
:errata => errata) do
|
364
|
-
key 'row_hash'
|
365
|
-
store 'make_name', :field_name => 'make'
|
366
|
-
store 'model_name', :field_name => 'model'
|
367
|
-
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
368
|
-
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
369
|
-
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
370
|
-
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
371
|
-
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
372
|
-
store 'cylinders', :field_name => 'NUMB CYL'
|
373
|
-
store 'displacement', :field_name => 'DISPLACEMENT'
|
374
|
-
store 'carline_class_code', :field_name => 'CLS'
|
375
|
-
store 'carline_class_name', :field_name => 'CLASS'
|
376
|
-
store 'year'
|
377
|
-
store 'transmission'
|
378
|
-
store 'speeds'
|
379
|
-
store 'turbo'
|
380
|
-
store 'supercharger'
|
381
|
-
store 'injection'
|
382
|
-
store 'drive'
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
# associate :make, :key => :original_automobile_make_name, :foreign_key => :name
|
387
|
-
# derive :automobile_model_id # creates models by name
|
388
|
-
# associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
|
389
|
-
|
390
|
-
process 'Set adjusted fuel economy' do
|
391
|
-
update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
|
392
|
-
update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
def name
|
397
|
-
extra = []
|
398
|
-
extra << "V#{cylinders}" if cylinders
|
399
|
-
extra << "#{displacement}L" if displacement
|
400
|
-
extra << "turbo" if turbo
|
401
|
-
extra << "FI" if injection
|
402
|
-
extra << "#{speeds}spd" if speeds.present?
|
403
|
-
extra << transmission if transmission.present?
|
404
|
-
extra << "(#{fuel_type.name})" if fuel_type
|
405
|
-
extra.join(' ')
|
406
|
-
end
|
407
|
-
|
408
|
-
def fuel_economy_description
|
409
|
-
[ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
class Country < ActiveRecord::Base
|
414
|
-
set_primary_key :iso_3166
|
415
|
-
|
416
|
-
data_miner do
|
417
|
-
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
418
|
-
key 'iso_3166', :field_number => 1
|
419
|
-
store 'name', :field_number => 0
|
420
|
-
end
|
421
|
-
|
422
|
-
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
423
|
-
key 'iso_3166', :field_name => 'country code'
|
424
|
-
store 'name', :field_name => 'country'
|
425
|
-
end
|
426
|
-
end
|
427
|
-
end
|
428
|
-
|
429
|
-
class Airport < ActiveRecord::Base
|
430
|
-
set_primary_key :iata_code
|
431
|
-
|
432
|
-
data_miner do
|
433
|
-
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
434
|
-
key 'iata_code', :field_number => 4
|
435
|
-
store 'name', :field_number => 1
|
436
|
-
store 'city', :field_number => 2
|
437
|
-
store 'country_name', :field_number => 3
|
438
|
-
store 'latitude', :field_number => 6
|
439
|
-
store 'longitude', :field_number => 7
|
440
|
-
end
|
441
|
-
end
|
442
|
-
end
|
443
|
-
|
444
3
|
class TappedAirport < ActiveRecord::Base
|
445
4
|
set_primary_key :iata_code
|
446
5
|
|
@@ -947,103 +506,6 @@ class T100FlightSegment < ActiveRecord::Base
|
|
947
506
|
end
|
948
507
|
end
|
949
508
|
|
950
|
-
require 'loose_tight_dictionary'
|
951
|
-
class Aircraft < ActiveRecord::Base
|
952
|
-
set_primary_key :icao_code
|
953
|
-
|
954
|
-
def self.bts_dictionary
|
955
|
-
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
956
|
-
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
957
|
-
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
958
|
-
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
959
|
-
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
960
|
-
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
961
|
-
end
|
962
|
-
|
963
|
-
class BtsAircraftTypeCodeMatcher
|
964
|
-
def match(left_record)
|
965
|
-
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
966
|
-
right_record['Aircraft Type'] if right_record
|
967
|
-
end
|
968
|
-
end
|
969
|
-
|
970
|
-
class BtsNameMatcher
|
971
|
-
def match(left_record)
|
972
|
-
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
973
|
-
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
974
|
-
end
|
975
|
-
end
|
976
|
-
|
977
|
-
class Guru
|
978
|
-
# for errata
|
979
|
-
def is_attributed_to_boeing?(row)
|
980
|
-
row['Manufacturer'] =~ /BOEING/i
|
981
|
-
end
|
982
|
-
|
983
|
-
def is_attributed_to_cessna?(row)
|
984
|
-
row['Manufacturer'] =~ /CESSNA/i
|
985
|
-
end
|
986
|
-
|
987
|
-
def is_attributed_to_fokker?(row)
|
988
|
-
row['Manufacturer'] =~ /FOKKER/i
|
989
|
-
end
|
990
|
-
|
991
|
-
def is_not_attributed_to_aerospatiale?(row)
|
992
|
-
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
993
|
-
end
|
994
|
-
|
995
|
-
def is_not_attributed_to_cessna?(row)
|
996
|
-
not row['Manufacturer'] =~ /CESSNA/i
|
997
|
-
end
|
998
|
-
|
999
|
-
def is_not_attributed_to_learjet?(row)
|
1000
|
-
not row['Manufacturer'] =~ /LEAR/i
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
def is_not_attributed_to_dehavilland?(row)
|
1004
|
-
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
1005
|
-
end
|
1006
|
-
|
1007
|
-
def is_not_attributed_to_mcdonnell_douglas?(row)
|
1008
|
-
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
1009
|
-
end
|
1010
|
-
|
1011
|
-
def is_not_a_dc_plane?(row)
|
1012
|
-
not row['Model'] =~ /DC/i
|
1013
|
-
end
|
1014
|
-
|
1015
|
-
def is_a_crj_900?(row)
|
1016
|
-
row['Designator'].downcase == 'crj9'
|
1017
|
-
end
|
1018
|
-
end
|
1019
|
-
|
1020
|
-
data_miner do
|
1021
|
-
# ('A'..'Z').each do |letter|
|
1022
|
-
# Note: for the purposes of testing, only importing "D"
|
1023
|
-
%w{ D }.each do |letter|
|
1024
|
-
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1025
|
-
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1026
|
-
:encoding => 'US-ASCII',
|
1027
|
-
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1028
|
-
:responder => Aircraft::Guru.new),
|
1029
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
1030
|
-
:column_xpath => 'td') do
|
1031
|
-
key 'icao_code', :field_name => 'Designator'
|
1032
|
-
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
1033
|
-
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
1034
|
-
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1035
|
-
store 'name', :field_name => 'Model'
|
1036
|
-
end
|
1037
|
-
|
1038
|
-
import 'Brighter Planet aircraft class codes',
|
1039
|
-
:url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
|
1040
|
-
key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
|
1041
|
-
store 'brighter_planet_aircraft_class_code'
|
1042
|
-
end
|
1043
|
-
end
|
1044
|
-
end
|
1045
|
-
end
|
1046
|
-
|
1047
509
|
# note that this depends on stuff in Aircraft
|
1048
510
|
class AircraftDeux < ActiveRecord::Base
|
1049
511
|
set_primary_key :icao_code
|
@@ -1166,7 +628,19 @@ end
|
|
1166
628
|
|
1167
629
|
# todo: have somebody properly organize these
|
1168
630
|
class DataMinerTest < Test::Unit::TestCase
|
1169
|
-
if ENV['
|
631
|
+
if ENV['WIP']
|
632
|
+
context 'with nullify option' do
|
633
|
+
should 'treat blank fields as null values' do
|
634
|
+
Aircraft.delete_all
|
635
|
+
Aircraft.data_miner_runs.delete_all
|
636
|
+
Aircraft.run_data_miner!
|
637
|
+
assert_greater_than 0, Aircraft.count
|
638
|
+
assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
|
639
|
+
end
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
if ENV['ALL'] == 'true'
|
1170
644
|
should 'directly create a table for the model' do
|
1171
645
|
if AutomobileMakeFleetYear.table_exists?
|
1172
646
|
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'loose_tight_dictionary'
|
2
|
+
|
3
|
+
class Aircraft < ActiveRecord::Base
|
4
|
+
set_primary_key :icao_code
|
5
|
+
set_table_name 'aircraft'
|
6
|
+
|
7
|
+
def self.bts_dictionary
|
8
|
+
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
9
|
+
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
10
|
+
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
11
|
+
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
12
|
+
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
13
|
+
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
14
|
+
end
|
15
|
+
|
16
|
+
class BtsAircraftTypeCodeMatcher
|
17
|
+
def match(left_record)
|
18
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
19
|
+
right_record['Aircraft Type'] if right_record
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class BtsNameMatcher
|
24
|
+
def match(left_record)
|
25
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
26
|
+
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Guru
|
31
|
+
# for errata
|
32
|
+
def is_attributed_to_boeing?(row)
|
33
|
+
row['Manufacturer'] =~ /BOEING/i
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_attributed_to_cessna?(row)
|
37
|
+
row['Manufacturer'] =~ /CESSNA/i
|
38
|
+
end
|
39
|
+
|
40
|
+
def is_attributed_to_fokker?(row)
|
41
|
+
row['Manufacturer'] =~ /FOKKER/i
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_not_attributed_to_aerospatiale?(row)
|
45
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
46
|
+
end
|
47
|
+
|
48
|
+
def is_not_attributed_to_cessna?(row)
|
49
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_not_attributed_to_learjet?(row)
|
53
|
+
not row['Manufacturer'] =~ /LEAR/i
|
54
|
+
end
|
55
|
+
|
56
|
+
def is_not_attributed_to_dehavilland?(row)
|
57
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
58
|
+
end
|
59
|
+
|
60
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
61
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_not_a_dc_plane?(row)
|
65
|
+
not row['Model'] =~ /DC/i
|
66
|
+
end
|
67
|
+
|
68
|
+
def is_a_crj_900?(row)
|
69
|
+
row['Designator'].downcase == 'crj9'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
data_miner do
|
74
|
+
# ('A'..'Z').each do |letter|
|
75
|
+
# Note: for the purposes of testing, only importing "D"
|
76
|
+
%w{ D }.each do |letter|
|
77
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
78
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
79
|
+
:encoding => 'US-ASCII',
|
80
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
81
|
+
:responder => Aircraft::Guru.new),
|
82
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
83
|
+
:column_xpath => 'td') do
|
84
|
+
key 'icao_code', :field_name => 'Designator'
|
85
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new, :nullify => true
|
86
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new, :nullify => true
|
87
|
+
store 'manufacturer_name', :field_name => 'Manufacturer', :nullify => true
|
88
|
+
store 'name', :field_name => 'Model', :nullify => true
|
89
|
+
end
|
90
|
+
|
91
|
+
import 'Brighter Planet aircraft class codes',
|
92
|
+
:url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
|
93
|
+
key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
|
94
|
+
store 'brighter_planet_aircraft_class_code', :nullify => true
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Airport < ActiveRecord::Base
|
2
|
+
set_primary_key :iata_code
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
6
|
+
key 'iata_code', :field_number => 4
|
7
|
+
store 'name', :field_number => 1
|
8
|
+
store 'city', :field_number => 2
|
9
|
+
store 'country_name', :field_number => 3
|
10
|
+
store 'latitude', :field_number => 6, :nullify => true
|
11
|
+
store 'longitude', :field_number => 7, :nullify => true
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class AutomobileFuelType < ActiveRecord::Base
|
2
|
+
set_primary_key :code
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
6
|
+
:filename => 'Gd6-dsc.txt',
|
7
|
+
:format => :fixed_width,
|
8
|
+
:crop => 21..26, # inclusive
|
9
|
+
:cut => '2-',
|
10
|
+
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
11
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
12
|
+
[ 'spacer', 2 ],
|
13
|
+
[ 'name', 52, { :type => :string } ]]) do
|
14
|
+
key 'code'
|
15
|
+
store 'name'
|
16
|
+
end
|
17
|
+
|
18
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
|
19
|
+
key 'code'
|
20
|
+
store 'name'
|
21
|
+
store 'annual_distance'
|
22
|
+
store 'emission_factor'
|
23
|
+
end
|
24
|
+
|
25
|
+
# pull electricity emission factor from residential electricity
|
26
|
+
import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
|
27
|
+
:select => lambda { |row| row['code'] == 'El' }) do
|
28
|
+
key 'code'
|
29
|
+
store 'name'
|
30
|
+
store 'emission_factor'
|
31
|
+
end
|
32
|
+
|
33
|
+
# still need distance estimate for electric cars
|
34
|
+
end
|
35
|
+
|
36
|
+
CODES = {
|
37
|
+
:electricity => 'El',
|
38
|
+
:diesel => 'D'
|
39
|
+
}
|
40
|
+
end
|
@@ -0,0 +1,368 @@
|
|
1
|
+
class AutomobileVariant < ActiveRecord::Base
|
2
|
+
set_primary_key :row_hash
|
3
|
+
|
4
|
+
module FuelEconomyGuide
|
5
|
+
TRANSMISSIONS = {
|
6
|
+
'A' => 'automatic',
|
7
|
+
'M' => 'manual',
|
8
|
+
'L' => 'automatic', # Lockup/automatic
|
9
|
+
'S' => 'semiautomatic', # Semiautomatic
|
10
|
+
'C' => 'manual' # TODO verify for VW Syncro
|
11
|
+
}
|
12
|
+
|
13
|
+
ENGINE_TYPES = {
|
14
|
+
'(GUZZLER)' => nil, # "gas guzzler"
|
15
|
+
'(POLICE)' => nil, # police automobile_variant
|
16
|
+
'(MPFI)' => 'injection',
|
17
|
+
'(MPI*)' => 'injection',
|
18
|
+
'(SPFI)' => 'injection',
|
19
|
+
'(FFS)' => 'injection',
|
20
|
+
'(TURBO)' => 'turbo',
|
21
|
+
'(TRBO)' => 'turbo',
|
22
|
+
'(TC*)' => 'turbo',
|
23
|
+
'(FFS,TRBO)' => %w(injection turbo),
|
24
|
+
'(S-CHARGE)' => 'supercharger',
|
25
|
+
'(SC*)' => 'supercharger',
|
26
|
+
'(DIESEL)' => nil, # diesel
|
27
|
+
'(DSL)' => nil, # diesel
|
28
|
+
'(ROTARY)' => nil, # rotary
|
29
|
+
'(VARIABLE)' => nil, # variable displacement
|
30
|
+
'(NO-CAT)' => nil, # no catalytic converter
|
31
|
+
'(OHC)' => nil, # overhead camshaft
|
32
|
+
'(OHV)' => nil, # overhead valves
|
33
|
+
'(16-VALVE)' => nil, # 16V
|
34
|
+
'(305)' => nil, # 305 cubic inch displacement
|
35
|
+
'(307)' => nil, # 307 cubic inch displacement
|
36
|
+
'(M-ENG)' => nil,
|
37
|
+
'(W-ENG)' => nil,
|
38
|
+
'(GM-BUICK)' => nil,
|
39
|
+
'(GM-CHEV)' => nil,
|
40
|
+
'(GM-OLDS)' => nil,
|
41
|
+
'(GM-PONT)' => nil,
|
42
|
+
}
|
43
|
+
|
44
|
+
class ParserB
|
45
|
+
attr_accessor :year
|
46
|
+
def initialize(options = {})
|
47
|
+
@year = options[:year]
|
48
|
+
end
|
49
|
+
|
50
|
+
def apply(row)
|
51
|
+
row.merge!({
|
52
|
+
'make' => row['carline_mfr_name'], # make it line up with the errata
|
53
|
+
'model' => row['carline_name'], # ditto
|
54
|
+
'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
|
55
|
+
'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
|
56
|
+
'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
|
57
|
+
'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
|
58
|
+
'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
|
59
|
+
'displacement' => _displacement(row['opt_disp']),
|
60
|
+
'year' => year
|
61
|
+
})
|
62
|
+
row
|
63
|
+
end
|
64
|
+
|
65
|
+
def _displacement(str)
|
66
|
+
str = str.gsub(/[\(\)]/, '').strip
|
67
|
+
if str =~ /^(.+)L$/
|
68
|
+
$1.to_f
|
69
|
+
elsif str =~ /^(.+)CC$/
|
70
|
+
$1.to_f / 1000
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_hints!(bus)
|
75
|
+
bus[:format] = :fixed_width
|
76
|
+
bus[:cut] = '13-' if year == 1995
|
77
|
+
bus[:schema_name] = :fuel_economy_guide_b
|
78
|
+
bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
|
79
|
+
Slither.define :fuel_economy_guide_b do |d|
|
80
|
+
d.rows do |row|
|
81
|
+
row.trap { true } # there's only one section
|
82
|
+
row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
|
83
|
+
row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
|
84
|
+
row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
|
85
|
+
row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
|
86
|
+
row.column 'carline_name' , 28, :type => :string # CARLINE NAME
|
87
|
+
row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
|
88
|
+
row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
|
89
|
+
row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
|
90
|
+
row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
|
91
|
+
row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
|
92
|
+
row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
|
93
|
+
row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
|
94
|
+
row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
|
95
|
+
row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
|
96
|
+
row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
|
97
|
+
row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
|
98
|
+
row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
|
99
|
+
row.spacer 2
|
100
|
+
row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
|
101
|
+
row.spacer 2
|
102
|
+
row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
|
103
|
+
row.spacer 2
|
104
|
+
row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
|
105
|
+
row.spacer 2
|
106
|
+
row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
|
107
|
+
row.spacer 2
|
108
|
+
row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
|
109
|
+
row.spacer 2
|
110
|
+
row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
|
111
|
+
row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
|
112
|
+
row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
|
113
|
+
row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
|
114
|
+
row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
|
115
|
+
row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
116
|
+
row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
117
|
+
row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
118
|
+
row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
|
119
|
+
row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
|
120
|
+
row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
|
121
|
+
row.column 'filler' , 1, :type => :string # NOT USED
|
122
|
+
row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
|
123
|
+
row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
class ParserC
|
129
|
+
attr_accessor :year
|
130
|
+
def initialize(options = {})
|
131
|
+
@year = options[:year]
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_hints!(bus)
|
135
|
+
# File will decide format based on filename
|
136
|
+
end
|
137
|
+
|
138
|
+
def apply(row)
|
139
|
+
row.merge!({
|
140
|
+
'make' => row['Manufacturer'], # make it line up with the errata
|
141
|
+
'model' => row['carline name'], # ditto
|
142
|
+
'drive' => row['drv'] + 'WD',
|
143
|
+
'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
|
144
|
+
'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
|
145
|
+
'turbo' => row['T'] == 'T',
|
146
|
+
'supercharger' => row['S'] == 'S',
|
147
|
+
'injection' => true,
|
148
|
+
'year' => year
|
149
|
+
})
|
150
|
+
row
|
151
|
+
end
|
152
|
+
end
|
153
|
+
class ParserD
|
154
|
+
attr_accessor :year
|
155
|
+
def initialize(options = {})
|
156
|
+
@year = options[:year]
|
157
|
+
end
|
158
|
+
|
159
|
+
def add_hints!(bus)
|
160
|
+
bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
|
161
|
+
end
|
162
|
+
|
163
|
+
def apply(row)
|
164
|
+
row.merge!({
|
165
|
+
'make' => row['MFR'], # make it line up with the errata
|
166
|
+
'model' => row['CAR LINE'], # ditto
|
167
|
+
'drive' => row['DRIVE SYS'] + 'WD',
|
168
|
+
'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
|
169
|
+
'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
|
170
|
+
'turbo' => row['TURBO'] == 'T',
|
171
|
+
'supercharger' => row['SPCHGR'] == 'S',
|
172
|
+
'injection' => true,
|
173
|
+
'year' => year
|
174
|
+
})
|
175
|
+
row
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
class Guru
|
181
|
+
# the following matching methods are needed by the errata
|
182
|
+
# per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
|
183
|
+
|
184
|
+
def transmission_is_blank?(row)
|
185
|
+
row['transmission'].blank?
|
186
|
+
end
|
187
|
+
|
188
|
+
def is_a_2007_gmc_or_chevrolet?(row)
|
189
|
+
row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
|
190
|
+
end
|
191
|
+
|
192
|
+
def is_a_porsche?(row)
|
193
|
+
row['make'].upcase == 'PORSCHE'
|
194
|
+
end
|
195
|
+
|
196
|
+
def is_not_a_porsche?(row)
|
197
|
+
!is_a_porsche? row
|
198
|
+
end
|
199
|
+
|
200
|
+
def is_a_mercedes_benz?(row)
|
201
|
+
row['make'] =~ /MERCEDES/i
|
202
|
+
end
|
203
|
+
|
204
|
+
def is_a_lexus?(row)
|
205
|
+
row['make'].upcase == 'LEXUS'
|
206
|
+
end
|
207
|
+
|
208
|
+
def is_a_bmw?(row)
|
209
|
+
row['make'].upcase == 'BMW'
|
210
|
+
end
|
211
|
+
|
212
|
+
def is_a_ford?(row)
|
213
|
+
row['make'].upcase == 'FORD'
|
214
|
+
end
|
215
|
+
|
216
|
+
def is_a_rolls_royce_and_model_contains_bentley?(row)
|
217
|
+
is_a_rolls_royce?(row) and model_contains_bentley?(row)
|
218
|
+
end
|
219
|
+
|
220
|
+
def is_a_bentley?(row)
|
221
|
+
row['make'].upcase == 'BENTLEY'
|
222
|
+
end
|
223
|
+
|
224
|
+
def is_a_rolls_royce?(row)
|
225
|
+
row['make'] =~ /ROLLS/i
|
226
|
+
end
|
227
|
+
|
228
|
+
def is_a_turbo_brooklands?(row)
|
229
|
+
row['model'] =~ /TURBO R\/RL BKLDS/i
|
230
|
+
end
|
231
|
+
|
232
|
+
def model_contains_maybach?(row)
|
233
|
+
row['model'] =~ /MAYBACH/i
|
234
|
+
end
|
235
|
+
|
236
|
+
def model_contains_bentley?(row)
|
237
|
+
row['model'] =~ /BENTLEY/i
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
|
242
|
+
:responder => AutomobileVariant::Guru.new
|
243
|
+
|
244
|
+
data_miner do
|
245
|
+
# 1985---1997
|
246
|
+
(85..97).each do |yy|
|
247
|
+
filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
|
248
|
+
import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
|
249
|
+
:filename => filename,
|
250
|
+
:transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
|
251
|
+
:errata => errata) do
|
252
|
+
key 'row_hash'
|
253
|
+
store 'make_name', :field_name => 'make'
|
254
|
+
store 'model_name', :field_name => 'model'
|
255
|
+
store 'year'
|
256
|
+
store 'fuel_type_code', :field_name => 'fuel_type'
|
257
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
258
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
259
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
260
|
+
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
261
|
+
store 'cylinders', :field_name => 'no_cyc'
|
262
|
+
store 'drive', :field_name => 'drive_system'
|
263
|
+
store 'carline_mfr_code'
|
264
|
+
store 'vi_mfr_code'
|
265
|
+
store 'carline_code'
|
266
|
+
store 'carline_class_code', :field_name => 'carline_clss'
|
267
|
+
store 'transmission'
|
268
|
+
store 'speeds'
|
269
|
+
store 'turbo'
|
270
|
+
store 'supercharger'
|
271
|
+
store 'injection'
|
272
|
+
store 'displacement'
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# 1998--2005
|
277
|
+
{
|
278
|
+
1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
|
279
|
+
1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
|
280
|
+
2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
|
281
|
+
2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
|
282
|
+
2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
|
283
|
+
2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
|
284
|
+
2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
|
285
|
+
2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
|
286
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
287
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
|
288
|
+
:errata => errata) do
|
289
|
+
key 'row_hash'
|
290
|
+
store 'make_name', :field_name => 'make'
|
291
|
+
store 'model_name', :field_name => 'model'
|
292
|
+
store 'fuel_type_code', :field_name => 'fl'
|
293
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
294
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
295
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
296
|
+
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
297
|
+
store 'cylinders', :field_name => 'cyl'
|
298
|
+
store 'displacement', :field_name => 'displ'
|
299
|
+
store 'carline_class_code', :field_name => 'cls' if year >= 2000
|
300
|
+
store 'carline_class_name', :field_name => 'Class'
|
301
|
+
store 'year'
|
302
|
+
store 'transmission'
|
303
|
+
store 'speeds'
|
304
|
+
store 'turbo'
|
305
|
+
store 'supercharger'
|
306
|
+
store 'injection'
|
307
|
+
store 'drive'
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
# 2006--2010
|
312
|
+
{
|
313
|
+
2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
|
314
|
+
2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
|
315
|
+
2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
|
316
|
+
2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
|
317
|
+
# 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
|
318
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
319
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
|
320
|
+
:errata => errata) do
|
321
|
+
key 'row_hash'
|
322
|
+
store 'make_name', :field_name => 'make'
|
323
|
+
store 'model_name', :field_name => 'model'
|
324
|
+
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
325
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
326
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
327
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
328
|
+
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
329
|
+
store 'cylinders', :field_name => 'NUMB CYL'
|
330
|
+
store 'displacement', :field_name => 'DISPLACEMENT'
|
331
|
+
store 'carline_class_code', :field_name => 'CLS'
|
332
|
+
store 'carline_class_name', :field_name => 'CLASS'
|
333
|
+
store 'year'
|
334
|
+
store 'transmission'
|
335
|
+
store 'speeds'
|
336
|
+
store 'turbo'
|
337
|
+
store 'supercharger'
|
338
|
+
store 'injection'
|
339
|
+
store 'drive'
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# associate :make, :key => :original_automobile_make_name, :foreign_key => :name
|
344
|
+
# derive :automobile_model_id # creates models by name
|
345
|
+
# associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
|
346
|
+
|
347
|
+
process 'Set adjusted fuel economy' do
|
348
|
+
update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
|
349
|
+
update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def name
|
354
|
+
extra = []
|
355
|
+
extra << "V#{cylinders}" if cylinders
|
356
|
+
extra << "#{displacement}L" if displacement
|
357
|
+
extra << "turbo" if turbo
|
358
|
+
extra << "FI" if injection
|
359
|
+
extra << "#{speeds}spd" if speeds.present?
|
360
|
+
extra << transmission if transmission.present?
|
361
|
+
extra << "(#{fuel_type.name})" if fuel_type
|
362
|
+
extra.join(' ')
|
363
|
+
end
|
364
|
+
|
365
|
+
def fuel_economy_description
|
366
|
+
[ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
|
367
|
+
end
|
368
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class Country < ActiveRecord::Base
|
2
|
+
set_primary_key :iso_3166
|
3
|
+
|
4
|
+
data_miner do
|
5
|
+
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
6
|
+
key 'iso_3166', :field_number => 1
|
7
|
+
store 'name', :field_number => 0
|
8
|
+
end
|
9
|
+
|
10
|
+
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
11
|
+
key 'iso_3166', :field_name => 'country code'
|
12
|
+
store 'name', :field_name => 'country'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/test_helper.rb
CHANGED
@@ -6,6 +6,8 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
6
6
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
7
|
require 'data_miner'
|
8
8
|
|
9
|
+
ENV['WIP'] = true if ENV['ALL'] == 'true'
|
10
|
+
|
9
11
|
ActiveRecord::Base.establish_connection(
|
10
12
|
'adapter' => 'mysql',
|
11
13
|
'database' => 'data_miner_test',
|
@@ -13,11 +15,12 @@ ActiveRecord::Base.establish_connection(
|
|
13
15
|
'password' => 'password'
|
14
16
|
)
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
+
Dir.glob(File.expand_path('support/*.rb', File.dirname(__FILE__))).each do |lib|
|
19
|
+
require lib
|
18
20
|
end
|
19
21
|
|
20
|
-
|
22
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
23
|
+
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
|
21
24
|
end
|
22
25
|
|
23
26
|
ActiveRecord::Schema.define(:version => 20090819143429) do
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
8
|
+
- 6
|
9
|
+
version: 0.5.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-
|
19
|
+
date: 2010-11-01 00:00:00 -04:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -270,6 +270,12 @@ files:
|
|
270
270
|
- lib/data_miner/run.rb
|
271
271
|
- lib/data_miner/schema.rb
|
272
272
|
- lib/data_miner/tap.rb
|
273
|
+
- test/data_miner/attribute_test.rb
|
274
|
+
- test/support/airport.rb
|
275
|
+
- test/support/country.rb
|
276
|
+
- test/support/automobile_fuel_type.rb
|
277
|
+
- test/support/aircraft.rb
|
278
|
+
- test/support/automobile_variant.rb
|
273
279
|
- test/data_miner_test.rb
|
274
280
|
- test/test_helper.rb
|
275
281
|
has_rdoc: true
|
@@ -307,5 +313,11 @@ signing_key:
|
|
307
313
|
specification_version: 3
|
308
314
|
summary: Mine remote data into your ActiveRecord models.
|
309
315
|
test_files:
|
316
|
+
- test/data_miner/attribute_test.rb
|
317
|
+
- test/support/airport.rb
|
318
|
+
- test/support/country.rb
|
319
|
+
- test/support/automobile_fuel_type.rb
|
320
|
+
- test/support/aircraft.rb
|
321
|
+
- test/support/automobile_variant.rb
|
310
322
|
- test/data_miner_test.rb
|
311
323
|
- test/test_helper.rb
|