data_miner 1.3.8 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,16 +0,0 @@
1
- class Airport < ActiveRecord::Base
2
- set_primary_key :iata_code
3
-
4
- data_miner do
5
- import :url => 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat',
6
- :headers => false,
7
- :select => lambda { |row| row[4].present? } do
8
- key 'iata_code', :field_number => 4
9
- store 'name', :field_number => 1
10
- store 'city', :field_number => 2
11
- store 'country_name', :field_number => 3
12
- store 'latitude', :field_number => 6, :nullify => true
13
- store 'longitude', :field_number => 7, :nullify => true
14
- end
15
- end
16
- end
@@ -1,40 +0,0 @@
1
- class AutomobileFuelType < ActiveRecord::Base
2
- set_primary_key :code
3
-
4
- data_miner do
5
- import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
6
- :filename => 'Gd6-dsc.txt',
7
- :format => :fixed_width,
8
- :crop => 21..26, # inclusive
9
- :cut => '2-',
10
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
11
- :schema => [[ 'code', 2, { :type => :string } ],
12
- [ 'spacer', 2 ],
13
- [ 'name', 52, { :type => :string } ]]) do
14
- key 'code'
15
- store 'name'
16
- end
17
-
18
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
19
- key 'code'
20
- store 'name'
21
- store 'annual_distance'
22
- store 'emission_factor'
23
- end
24
-
25
- # pull electricity emission factor from residential electricity
26
- import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
27
- :select => lambda { |row| row['code'] == 'El' }) do
28
- key 'code'
29
- store 'name'
30
- store 'emission_factor'
31
- end
32
-
33
- # still need distance estimate for electric cars
34
- end
35
-
36
- CODES = {
37
- :electricity => 'El',
38
- :diesel => 'D'
39
- }
40
- end
@@ -1,362 +0,0 @@
1
- class AutomobileVariant < ActiveRecord::Base
2
- set_primary_key :row_hash
3
-
4
- module FuelEconomyGuide
5
- TRANSMISSIONS = {
6
- 'A' => 'automatic',
7
- 'M' => 'manual',
8
- 'L' => 'automatic', # Lockup/automatic
9
- 'S' => 'semiautomatic', # Semiautomatic
10
- 'C' => 'manual' # TODO verify for VW Syncro
11
- }
12
-
13
- ENGINE_TYPES = {
14
- '(GUZZLER)' => nil, # "gas guzzler"
15
- '(POLICE)' => nil, # police automobile_variant
16
- '(MPFI)' => 'injection',
17
- '(MPI*)' => 'injection',
18
- '(SPFI)' => 'injection',
19
- '(FFS)' => 'injection',
20
- '(TURBO)' => 'turbo',
21
- '(TRBO)' => 'turbo',
22
- '(TC*)' => 'turbo',
23
- '(FFS,TRBO)' => %w(injection turbo),
24
- '(S-CHARGE)' => 'supercharger',
25
- '(SC*)' => 'supercharger',
26
- '(DIESEL)' => nil, # diesel
27
- '(DSL)' => nil, # diesel
28
- '(ROTARY)' => nil, # rotary
29
- '(VARIABLE)' => nil, # variable displacement
30
- '(NO-CAT)' => nil, # no catalytic converter
31
- '(OHC)' => nil, # overhead camshaft
32
- '(OHV)' => nil, # overhead valves
33
- '(16-VALVE)' => nil, # 16V
34
- '(305)' => nil, # 305 cubic inch displacement
35
- '(307)' => nil, # 307 cubic inch displacement
36
- '(M-ENG)' => nil,
37
- '(W-ENG)' => nil,
38
- '(GM-BUICK)' => nil,
39
- '(GM-CHEV)' => nil,
40
- '(GM-OLDS)' => nil,
41
- '(GM-PONT)' => nil,
42
- }
43
-
44
- class ParserB
45
- require 'fixed_width'
46
- ::FixedWidth.define :fuel_economy_guide_b do |d|
47
- d.rows do |row|
48
- row.trap { true } # there's only one section
49
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
50
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
51
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
52
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
53
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
54
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
55
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
56
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
57
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
58
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
59
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
60
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
61
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
62
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
63
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
64
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
65
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
66
- row.spacer 2
67
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
68
- row.spacer 2
69
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
70
- row.spacer 2
71
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
72
- row.spacer 2
73
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
74
- row.spacer 2
75
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
76
- row.spacer 2
77
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
78
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
79
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
80
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
81
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
82
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
83
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
84
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
85
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
86
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
87
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
88
- row.column 'filler' , 1, :type => :string # NOT USED
89
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
90
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
91
- end
92
- end
93
- attr_accessor :year
94
- def initialize(options = {})
95
- options = options.stringify_keys
96
- @year = options['year']
97
- end
98
-
99
- def apply(row)
100
- row.merge!({
101
- 'make' => row['carline_mfr_name'], # make it line up with the errata
102
- 'model' => row['carline_name'], # ditto
103
- 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
104
- 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
105
- 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
106
- 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
107
- 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
108
- 'displacement' => _displacement(row['opt_disp']),
109
- 'year' => year
110
- })
111
- row
112
- end
113
-
114
- def _displacement(str)
115
- str = str.gsub(/[\(\)]/, '').strip
116
- if str =~ /^(.+)L$/
117
- $1.to_f
118
- elsif str =~ /^(.+)CC$/
119
- $1.to_f / 1000
120
- end
121
- end
122
-
123
- end
124
- class ParserC
125
- attr_accessor :year
126
- def initialize(options = {})
127
- options = options.stringify_keys
128
- @year = options['year']
129
- end
130
-
131
- def apply(row)
132
- row.merge!({
133
- 'make' => row['Manufacturer'], # make it line up with the errata
134
- 'model' => row['carline name'], # ditto
135
- 'drive' => row['drv'] + 'WD',
136
- 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
137
- 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
138
- 'turbo' => row['T'] == 'T',
139
- 'supercharger' => row['S'] == 'S',
140
- 'injection' => true,
141
- 'year' => year
142
- })
143
- row
144
- end
145
- end
146
- class ParserD
147
- attr_accessor :year
148
- def initialize(options = {})
149
- options = options.stringify_keys
150
- @year = options['year']
151
- end
152
-
153
- def apply(row)
154
- row.merge!({
155
- 'make' => row['MFR'], # make it line up with the errata
156
- 'model' => row['CAR LINE'], # ditto
157
- 'drive' => row['DRIVE SYS'] + 'WD',
158
- 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
159
- 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
160
- 'turbo' => row['TURBO'] == 'T',
161
- 'supercharger' => row['SPCHGR'] == 'S',
162
- 'injection' => true,
163
- 'year' => year
164
- })
165
- row
166
- end
167
- end
168
- end
169
-
170
- class Guru
171
- # the following matching methods are needed by the errata
172
- # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
173
-
174
- def transmission_is_blank?(row)
175
- row['transmission'].blank?
176
- end
177
-
178
- def is_a_2007_gmc_or_chevrolet?(row)
179
- row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
180
- end
181
-
182
- def is_a_porsche?(row)
183
- row['make'].upcase == 'PORSCHE'
184
- end
185
-
186
- def is_not_a_porsche?(row)
187
- !is_a_porsche? row
188
- end
189
-
190
- def is_a_mercedes_benz?(row)
191
- row['make'] =~ /MERCEDES/i
192
- end
193
-
194
- def is_a_lexus?(row)
195
- row['make'].upcase == 'LEXUS'
196
- end
197
-
198
- def is_a_bmw?(row)
199
- row['make'].upcase == 'BMW'
200
- end
201
-
202
- def is_a_ford?(row)
203
- row['make'].upcase == 'FORD'
204
- end
205
-
206
- def is_a_rolls_royce_and_model_contains_bentley?(row)
207
- is_a_rolls_royce?(row) and model_contains_bentley?(row)
208
- end
209
-
210
- def is_a_bentley?(row)
211
- row['make'].upcase == 'BENTLEY'
212
- end
213
-
214
- def is_a_rolls_royce?(row)
215
- row['make'] =~ /ROLLS/i
216
- end
217
-
218
- def is_a_turbo_brooklands?(row)
219
- row['model'] =~ /TURBO R\/RL BKLDS/i
220
- end
221
-
222
- def model_contains_maybach?(row)
223
- row['model'] =~ /MAYBACH/i
224
- end
225
-
226
- def model_contains_bentley?(row)
227
- row['model'] =~ /BENTLEY/i
228
- end
229
- end
230
-
231
- errata = { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv', :responder => 'AutomobileVariant::Guru' }
232
-
233
- data_miner do
234
- # 1985---1997
235
- (85..97).each do |yy|
236
- filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
237
- import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
238
- :filename => filename,
239
- :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
240
- :format => :fixed_width,
241
- :cut => (yy == 95) ? '13-' : nil,
242
- :schema_name => :fuel_economy_guide_b,
243
- :select => lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' },
244
- :errata => errata) do
245
- key 'row_hash'
246
- store 'make_name', :field_name => 'make'
247
- store 'model_name', :field_name => 'model'
248
- store 'year'
249
- store 'fuel_type_code', :field_name => 'fuel_type'
250
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
251
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
252
- store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
253
- store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
254
- store 'cylinders', :field_name => 'no_cyc'
255
- store 'drive', :field_name => 'drive_system'
256
- store 'carline_mfr_code'
257
- store 'vi_mfr_code'
258
- store 'carline_code'
259
- store 'carline_class_code', :field_name => 'carline_clss'
260
- store 'transmission'
261
- store 'speeds'
262
- store 'turbo'
263
- store 'supercharger'
264
- store 'injection'
265
- store 'displacement'
266
- end
267
- end
268
-
269
- # 1998--2005
270
- {
271
- 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
272
- 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
273
- 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
274
- 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
275
- 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
276
- 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
277
- 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
278
- 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
279
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
280
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
281
- :errata => errata) do
282
- key 'row_hash'
283
- store 'make_name', :field_name => 'make'
284
- store 'model_name', :field_name => 'model'
285
- store 'fuel_type_code', :field_name => 'fl'
286
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
287
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
288
- store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
289
- store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
290
- store 'cylinders', :field_name => 'cyl'
291
- store 'displacement', :field_name => 'displ'
292
- store 'carline_class_code', :field_name => 'cls' if year >= 2000
293
- store 'carline_class_name', :field_name => 'Class'
294
- store 'year'
295
- store 'transmission'
296
- store 'speeds'
297
- store 'turbo'
298
- store 'supercharger'
299
- store 'injection'
300
- store 'drive'
301
- end
302
- end
303
-
304
- # 2006--2010
305
- {
306
- 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
307
- 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
308
- 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
309
- 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
310
- # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
311
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
312
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
313
- :reject => (year == 2007) ? lambda { |row| row.values.first.blank? } : nil,
314
- :errata => errata) do
315
- key 'row_hash'
316
- store 'make_name', :field_name => 'make'
317
- store 'model_name', :field_name => 'model'
318
- store 'fuel_type_code', :field_name => 'FUEL TYPE'
319
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
320
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
321
- store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
322
- store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
323
- store 'cylinders', :field_name => 'NUMB CYL'
324
- store 'displacement', :field_name => 'DISPLACEMENT'
325
- store 'carline_class_code', :field_name => 'CLS'
326
- store 'carline_class_name', :field_name => 'CLASS'
327
- store 'year'
328
- store 'transmission'
329
- store 'speeds'
330
- store 'turbo'
331
- store 'supercharger'
332
- store 'injection'
333
- store 'drive'
334
- end
335
- end
336
-
337
- # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
338
- # derive :automobile_model_id # creates models by name
339
- # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
340
-
341
- process 'Set adjusted fuel economy' do
342
- update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
343
- update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
344
- end
345
- end
346
-
347
- def name
348
- extra = []
349
- extra << "V#{cylinders}" if cylinders
350
- extra << "#{displacement}L" if displacement
351
- extra << "turbo" if turbo
352
- extra << "FI" if injection
353
- extra << "#{speeds}spd" if speeds.present?
354
- extra << transmission if transmission.present?
355
- extra << "(#{fuel_type.name})" if fuel_type
356
- extra.join(' ')
357
- end
358
-
359
- def fuel_economy_description
360
- [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
361
- end
362
- end
@@ -1,15 +0,0 @@
1
- class Country < ActiveRecord::Base
2
- set_primary_key :iso_3166
3
-
4
- data_miner do
5
- import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :encoding => 'ISO-8859-1', :skip => 2, :headers => false, :delimiter => ';' do
6
- key 'iso_3166', :field_number => 1
7
- store 'name', :field_number => 0
8
- end
9
-
10
- import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
11
- key 'iso_3166', :field_name => 'country code'
12
- store 'name', :field_name => 'country'
13
- end
14
- end
15
- end
@@ -1,311 +0,0 @@
1
- module TestDatabase
2
- extend self
3
-
4
- def connect
5
- @connection ||= ActiveRecord::Base.establish_connection(
6
- 'adapter' => 'mysql',
7
- 'database' => 'data_miner_test',
8
- 'username' => 'root',
9
- 'password' => 'password'
10
- )
11
- end
12
-
13
- def load_schema
14
- connect
15
-
16
- ActiveRecord::Schema.define(:version => 20090819143429) do
17
- create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
18
- t.integer "departures_performed"
19
- t.integer "payload"
20
- t.integer "seats"
21
- t.integer "passengers"
22
- t.integer "freight"
23
- t.integer "mail"
24
- t.integer "ramp_to_ramp"
25
- t.integer "air_time"
26
- t.float "load_factor"
27
- t.float "freight_share"
28
- t.integer "distance"
29
- t.integer "departures_scheduled"
30
- t.string "unique_carrier"
31
- t.integer "dot_airline_id"
32
- t.string "unique_carrier_name"
33
- t.string "unique_carrier_entity"
34
- t.string "region"
35
- t.string "carrier"
36
- t.string "carrier_name"
37
- t.integer "carrier_group"
38
- t.integer "carrier_group_new"
39
- t.string "origin_airport_iata"
40
- t.string "origin_city_name"
41
- t.integer "origin_city_num"
42
- t.string "origin_state_abr"
43
- t.string "origin_state_fips"
44
- t.string "origin_state_nm"
45
- t.string "origin_country_iso_3166"
46
- t.string "origin_country_name"
47
- t.integer "origin_wac"
48
- t.string "dest_airport_iata"
49
- t.string "dest_city_name"
50
- t.integer "dest_city_num"
51
- t.string "dest_state_abr"
52
- t.string "dest_state_fips"
53
- t.string "dest_state_nm"
54
- t.string "dest_country_iso_3166"
55
- t.string "dest_country_name"
56
- t.integer "dest_wac"
57
- t.integer "bts_aircraft_group"
58
- t.integer "bts_aircraft_type"
59
- t.integer "bts_aircraft_config"
60
- t.integer "year"
61
- t.integer "quarter"
62
- t.integer "month"
63
- t.integer "bts_distance_group"
64
- t.string "bts_service_class"
65
- t.string "data_source"
66
- t.float "seats_per_departure"
67
-
68
- t.string 'payload_units'
69
- t.string 'freight_units'
70
- t.string 'mail_units'
71
- t.string 'distance_units'
72
-
73
- t.datetime "created_at"
74
- t.datetime "updated_at"
75
-
76
- t.string "row_hash"
77
- end
78
- execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
79
-
80
- create_table 'tapped_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
81
- t.string 'i_am_just_here_to_get_in_the_way'
82
- end
83
-
84
- create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
85
- t.string 'iata_code'
86
- t.string 'name'
87
- t.string 'city'
88
- t.string 'country_name'
89
- t.float 'latitude'
90
- t.float 'longitude'
91
- t.datetime 'created_at'
92
- t.datetime 'updated_at'
93
- end
94
- execute 'ALTER TABLE airports ADD PRIMARY KEY (iata_code);'
95
-
96
- create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
97
- t.string "iso_3166"
98
- t.string "name"
99
- t.datetime "created_at"
100
- t.datetime "updated_at"
101
- end
102
- execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
103
-
104
- create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
105
- t.integer "number"
106
- t.string "name"
107
- t.datetime "updated_at"
108
- t.datetime "created_at"
109
- end
110
- execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
111
-
112
- create_table 'census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
113
- t.integer 'number'
114
- t.string 'name'
115
- t.datetime 'updated_at'
116
- t.datetime 'created_at'
117
- t.string 'census_region_name'
118
- t.integer 'census_region_number'
119
-
120
- end
121
- execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
122
-
123
- create_table 'census_division_deux', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
124
- t.integer 'number'
125
- t.string 'name'
126
- t.datetime 'updated_at'
127
- t.datetime 'created_at'
128
- t.string 'census_region_name'
129
- t.integer 'census_region_number'
130
-
131
- end
132
- execute 'ALTER TABLE census_division_deux ADD PRIMARY KEY (number);'
133
-
134
- create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
135
- t.integer 'number'
136
- t.string 'name'
137
- t.datetime 'updated_at'
138
- t.datetime 'created_at'
139
- t.string 'census_region_name'
140
- t.integer 'census_region_number'
141
-
142
- end
143
- execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
144
-
145
- create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
146
- t.float "fuel_efficiency_city"
147
- t.float "fuel_efficiency_highway"
148
- t.string "make_name"
149
- t.string "model_name"
150
- t.string "year"
151
- t.string "fuel_type_code"
152
- t.datetime "updated_at"
153
- t.datetime "created_at"
154
- t.string "transmission"
155
- t.string "drive"
156
- t.boolean "turbo"
157
- t.boolean "supercharger"
158
- t.integer "cylinders"
159
- t.float "displacement"
160
- t.float "raw_fuel_efficiency_city"
161
- t.float "raw_fuel_efficiency_highway"
162
- t.integer "carline_mfr_code"
163
- t.integer "vi_mfr_code"
164
- t.integer "carline_code"
165
- t.integer "carline_class_code"
166
- t.boolean "injection"
167
- t.string "carline_class_name"
168
- t.string "speeds"
169
-
170
- t.string 'raw_fuel_efficiency_highway_units'
171
- t.string 'raw_fuel_efficiency_city_units'
172
- t.string 'fuel_efficiency_highway_units'
173
- t.string 'fuel_efficiency_city_units'
174
-
175
- t.string "row_hash"
176
- end
177
- execute "ALTER TABLE automobile_variants ADD PRIMARY KEY (row_hash);"
178
-
179
- create_table "automobile_fuel_types", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
180
- t.string "name"
181
- t.datetime "created_at"
182
- t.datetime "updated_at"
183
- t.float "emission_factor"
184
- t.string "emission_factor_units"
185
- t.float "annual_distance"
186
- t.string "annual_distance_units"
187
- t.string "code"
188
- end
189
- execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
190
-
191
- create_table "residential_energy_consumption_survey_responses", :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
192
- t.integer "department_of_energy_identifier"
193
-
194
- t.string "residence_class"
195
- t.date "construction_year"
196
- t.string "construction_period"
197
- t.string "urbanity"
198
- t.string "dishwasher_use"
199
- t.string "central_ac_use"
200
- t.string "window_ac_use"
201
- t.string "clothes_washer_use"
202
- t.string "clothes_dryer_use"
203
-
204
- t.integer "census_division_number"
205
- t.string "census_division_name"
206
- t.integer "census_region_number"
207
- t.string "census_region_name"
208
-
209
- t.float "rooms"
210
- t.float "floorspace"
211
- t.integer "residents"
212
- t.boolean "ownership"
213
- t.boolean "thermostat_programmability"
214
- t.integer "refrigerator_count"
215
- t.integer "freezer_count"
216
- t.float "annual_energy_from_fuel_oil_for_heating_space"
217
- t.float "annual_energy_from_fuel_oil_for_heating_water"
218
- t.float "annual_energy_from_fuel_oil_for_appliances"
219
- t.float "annual_energy_from_natural_gas_for_heating_space"
220
- t.float "annual_energy_from_natural_gas_for_heating_water"
221
- t.float "annual_energy_from_natural_gas_for_appliances"
222
- t.float "annual_energy_from_propane_for_heating_space"
223
- t.float "annual_energy_from_propane_for_heating_water"
224
- t.float "annual_energy_from_propane_for_appliances"
225
- t.float "annual_energy_from_wood"
226
- t.float "annual_energy_from_kerosene"
227
- t.float "annual_energy_from_electricity_for_clothes_driers"
228
- t.float "annual_energy_from_electricity_for_dishwashers"
229
- t.float "annual_energy_from_electricity_for_freezers"
230
- t.float "annual_energy_from_electricity_for_refrigerators"
231
- t.float "annual_energy_from_electricity_for_air_conditioners"
232
- t.float "annual_energy_from_electricity_for_heating_space"
233
- t.float "annual_energy_from_electricity_for_heating_water"
234
- t.float "annual_energy_from_electricity_for_other_appliances"
235
- t.float "weighting"
236
- t.float "lighting_use"
237
- t.float "lighting_efficiency"
238
- t.integer "heating_degree_days"
239
- t.integer "cooling_degree_days"
240
- t.integer "total_rooms"
241
- t.integer "bathrooms"
242
- t.integer "halfbaths"
243
- t.integer "heated_garage"
244
- t.integer "attached_1car_garage"
245
- t.integer "detached_1car_garage"
246
- t.integer "attached_2car_garage"
247
- t.integer "detached_2car_garage"
248
- t.integer "attached_3car_garage"
249
- t.integer "detached_3car_garage"
250
- t.integer "lights_on_1_to_4_hours"
251
- t.integer "efficient_lights_on_1_to_4_hours"
252
- t.integer "lights_on_4_to_12_hours"
253
- t.integer "efficient_lights_on_4_to_12_hours"
254
- t.integer "lights_on_over_12_hours"
255
- t.integer "efficient_lights_on_over_12_hours"
256
- t.integer "outdoor_all_night_lights"
257
- t.integer "outdoor_all_night_gas_lights"
258
-
259
- t.datetime "created_at"
260
- t.datetime "updated_at"
261
- end
262
- execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
263
-
264
- create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
265
- t.string 'icao_code'
266
- t.string 'manufacturer_name'
267
- t.string 'name'
268
-
269
- t.string "bts_name"
270
- t.string "bts_aircraft_type_code"
271
-
272
- t.string 'brighter_planet_aircraft_class_code'
273
- # t.float 'm3'
274
- # t.float 'm2'
275
- # t.float 'm1'
276
- # t.float 'endpoint_fuel'
277
- t.datetime 'updated_at'
278
- t.datetime 'created_at'
279
- end
280
- execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
281
-
282
- create_table 'aircraft_deux', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
283
- t.string 'icao_code'
284
- t.string 'manufacturer_name'
285
- t.string 'name'
286
-
287
- t.string "bts_name"
288
- t.string "bts_aircraft_type_code"
289
-
290
- # t.string 'brighter_planet_aircraft_class_code'
291
- # t.float 'm3'
292
- # t.float 'm2'
293
- # t.float 'm1'
294
- # t.float 'endpoint_fuel'
295
- t.datetime 'updated_at'
296
- t.datetime 'created_at'
297
- end
298
- execute 'ALTER TABLE aircraft_deux ADD PRIMARY KEY (icao_code);'
299
- end
300
-
301
- DataMiner::Run.create_tables
302
- end
303
-
304
- def load_models
305
- load_schema
306
-
307
- Dir.glob(File.expand_path('*.rb', File.dirname(__FILE__))).each do |lib|
308
- require lib
309
- end
310
- end
311
- end