data_miner 1.3.8 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,16 +0,0 @@
1
- class Airport < ActiveRecord::Base
2
- set_primary_key :iata_code
3
-
4
- data_miner do
5
- import :url => 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat',
6
- :headers => false,
7
- :select => lambda { |row| row[4].present? } do
8
- key 'iata_code', :field_number => 4
9
- store 'name', :field_number => 1
10
- store 'city', :field_number => 2
11
- store 'country_name', :field_number => 3
12
- store 'latitude', :field_number => 6, :nullify => true
13
- store 'longitude', :field_number => 7, :nullify => true
14
- end
15
- end
16
- end
@@ -1,40 +0,0 @@
1
- class AutomobileFuelType < ActiveRecord::Base
2
- set_primary_key :code
3
-
4
- data_miner do
5
- import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
6
- :filename => 'Gd6-dsc.txt',
7
- :format => :fixed_width,
8
- :crop => 21..26, # inclusive
9
- :cut => '2-',
10
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
11
- :schema => [[ 'code', 2, { :type => :string } ],
12
- [ 'spacer', 2 ],
13
- [ 'name', 52, { :type => :string } ]]) do
14
- key 'code'
15
- store 'name'
16
- end
17
-
18
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
19
- key 'code'
20
- store 'name'
21
- store 'annual_distance'
22
- store 'emission_factor'
23
- end
24
-
25
- # pull electricity emission factor from residential electricity
26
- import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
27
- :select => lambda { |row| row['code'] == 'El' }) do
28
- key 'code'
29
- store 'name'
30
- store 'emission_factor'
31
- end
32
-
33
- # still need distance estimate for electric cars
34
- end
35
-
36
- CODES = {
37
- :electricity => 'El',
38
- :diesel => 'D'
39
- }
40
- end
@@ -1,362 +0,0 @@
1
- class AutomobileVariant < ActiveRecord::Base
2
- set_primary_key :row_hash
3
-
4
- module FuelEconomyGuide
5
- TRANSMISSIONS = {
6
- 'A' => 'automatic',
7
- 'M' => 'manual',
8
- 'L' => 'automatic', # Lockup/automatic
9
- 'S' => 'semiautomatic', # Semiautomatic
10
- 'C' => 'manual' # TODO verify for VW Syncro
11
- }
12
-
13
- ENGINE_TYPES = {
14
- '(GUZZLER)' => nil, # "gas guzzler"
15
- '(POLICE)' => nil, # police automobile_variant
16
- '(MPFI)' => 'injection',
17
- '(MPI*)' => 'injection',
18
- '(SPFI)' => 'injection',
19
- '(FFS)' => 'injection',
20
- '(TURBO)' => 'turbo',
21
- '(TRBO)' => 'turbo',
22
- '(TC*)' => 'turbo',
23
- '(FFS,TRBO)' => %w(injection turbo),
24
- '(S-CHARGE)' => 'supercharger',
25
- '(SC*)' => 'supercharger',
26
- '(DIESEL)' => nil, # diesel
27
- '(DSL)' => nil, # diesel
28
- '(ROTARY)' => nil, # rotary
29
- '(VARIABLE)' => nil, # variable displacement
30
- '(NO-CAT)' => nil, # no catalytic converter
31
- '(OHC)' => nil, # overhead camshaft
32
- '(OHV)' => nil, # overhead valves
33
- '(16-VALVE)' => nil, # 16V
34
- '(305)' => nil, # 305 cubic inch displacement
35
- '(307)' => nil, # 307 cubic inch displacement
36
- '(M-ENG)' => nil,
37
- '(W-ENG)' => nil,
38
- '(GM-BUICK)' => nil,
39
- '(GM-CHEV)' => nil,
40
- '(GM-OLDS)' => nil,
41
- '(GM-PONT)' => nil,
42
- }
43
-
44
- class ParserB
45
- require 'fixed_width'
46
- ::FixedWidth.define :fuel_economy_guide_b do |d|
47
- d.rows do |row|
48
- row.trap { true } # there's only one section
49
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
50
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
51
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
52
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
53
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
54
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
55
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
56
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
57
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
58
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
59
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
60
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
61
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
62
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
63
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
64
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
65
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
66
- row.spacer 2
67
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
68
- row.spacer 2
69
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
70
- row.spacer 2
71
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
72
- row.spacer 2
73
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
74
- row.spacer 2
75
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
76
- row.spacer 2
77
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
78
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
79
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
80
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
81
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
82
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
83
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
84
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
85
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
86
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
87
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
88
- row.column 'filler' , 1, :type => :string # NOT USED
89
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
90
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
91
- end
92
- end
93
- attr_accessor :year
94
- def initialize(options = {})
95
- options = options.stringify_keys
96
- @year = options['year']
97
- end
98
-
99
- def apply(row)
100
- row.merge!({
101
- 'make' => row['carline_mfr_name'], # make it line up with the errata
102
- 'model' => row['carline_name'], # ditto
103
- 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
104
- 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
105
- 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
106
- 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
107
- 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
108
- 'displacement' => _displacement(row['opt_disp']),
109
- 'year' => year
110
- })
111
- row
112
- end
113
-
114
- def _displacement(str)
115
- str = str.gsub(/[\(\)]/, '').strip
116
- if str =~ /^(.+)L$/
117
- $1.to_f
118
- elsif str =~ /^(.+)CC$/
119
- $1.to_f / 1000
120
- end
121
- end
122
-
123
- end
124
- class ParserC
125
- attr_accessor :year
126
- def initialize(options = {})
127
- options = options.stringify_keys
128
- @year = options['year']
129
- end
130
-
131
- def apply(row)
132
- row.merge!({
133
- 'make' => row['Manufacturer'], # make it line up with the errata
134
- 'model' => row['carline name'], # ditto
135
- 'drive' => row['drv'] + 'WD',
136
- 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
137
- 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
138
- 'turbo' => row['T'] == 'T',
139
- 'supercharger' => row['S'] == 'S',
140
- 'injection' => true,
141
- 'year' => year
142
- })
143
- row
144
- end
145
- end
146
- class ParserD
147
- attr_accessor :year
148
- def initialize(options = {})
149
- options = options.stringify_keys
150
- @year = options['year']
151
- end
152
-
153
- def apply(row)
154
- row.merge!({
155
- 'make' => row['MFR'], # make it line up with the errata
156
- 'model' => row['CAR LINE'], # ditto
157
- 'drive' => row['DRIVE SYS'] + 'WD',
158
- 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
159
- 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
160
- 'turbo' => row['TURBO'] == 'T',
161
- 'supercharger' => row['SPCHGR'] == 'S',
162
- 'injection' => true,
163
- 'year' => year
164
- })
165
- row
166
- end
167
- end
168
- end
169
-
170
- class Guru
171
- # the following matching methods are needed by the errata
172
- # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
173
-
174
- def transmission_is_blank?(row)
175
- row['transmission'].blank?
176
- end
177
-
178
- def is_a_2007_gmc_or_chevrolet?(row)
179
- row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
180
- end
181
-
182
- def is_a_porsche?(row)
183
- row['make'].upcase == 'PORSCHE'
184
- end
185
-
186
- def is_not_a_porsche?(row)
187
- !is_a_porsche? row
188
- end
189
-
190
- def is_a_mercedes_benz?(row)
191
- row['make'] =~ /MERCEDES/i
192
- end
193
-
194
- def is_a_lexus?(row)
195
- row['make'].upcase == 'LEXUS'
196
- end
197
-
198
- def is_a_bmw?(row)
199
- row['make'].upcase == 'BMW'
200
- end
201
-
202
- def is_a_ford?(row)
203
- row['make'].upcase == 'FORD'
204
- end
205
-
206
- def is_a_rolls_royce_and_model_contains_bentley?(row)
207
- is_a_rolls_royce?(row) and model_contains_bentley?(row)
208
- end
209
-
210
- def is_a_bentley?(row)
211
- row['make'].upcase == 'BENTLEY'
212
- end
213
-
214
- def is_a_rolls_royce?(row)
215
- row['make'] =~ /ROLLS/i
216
- end
217
-
218
- def is_a_turbo_brooklands?(row)
219
- row['model'] =~ /TURBO R\/RL BKLDS/i
220
- end
221
-
222
- def model_contains_maybach?(row)
223
- row['model'] =~ /MAYBACH/i
224
- end
225
-
226
- def model_contains_bentley?(row)
227
- row['model'] =~ /BENTLEY/i
228
- end
229
- end
230
-
231
- errata = { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv', :responder => 'AutomobileVariant::Guru' }
232
-
233
- data_miner do
234
- # 1985---1997
235
- (85..97).each do |yy|
236
- filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
237
- import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
238
- :filename => filename,
239
- :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
240
- :format => :fixed_width,
241
- :cut => (yy == 95) ? '13-' : nil,
242
- :schema_name => :fuel_economy_guide_b,
243
- :select => lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' },
244
- :errata => errata) do
245
- key 'row_hash'
246
- store 'make_name', :field_name => 'make'
247
- store 'model_name', :field_name => 'model'
248
- store 'year'
249
- store 'fuel_type_code', :field_name => 'fuel_type'
250
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
251
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
252
- store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
253
- store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
254
- store 'cylinders', :field_name => 'no_cyc'
255
- store 'drive', :field_name => 'drive_system'
256
- store 'carline_mfr_code'
257
- store 'vi_mfr_code'
258
- store 'carline_code'
259
- store 'carline_class_code', :field_name => 'carline_clss'
260
- store 'transmission'
261
- store 'speeds'
262
- store 'turbo'
263
- store 'supercharger'
264
- store 'injection'
265
- store 'displacement'
266
- end
267
- end
268
-
269
- # 1998--2005
270
- {
271
- 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
272
- 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
273
- 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
274
- 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
275
- 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
276
- 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
277
- 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
278
- 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
279
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
280
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
281
- :errata => errata) do
282
- key 'row_hash'
283
- store 'make_name', :field_name => 'make'
284
- store 'model_name', :field_name => 'model'
285
- store 'fuel_type_code', :field_name => 'fl'
286
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
287
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
288
- store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
289
- store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
290
- store 'cylinders', :field_name => 'cyl'
291
- store 'displacement', :field_name => 'displ'
292
- store 'carline_class_code', :field_name => 'cls' if year >= 2000
293
- store 'carline_class_name', :field_name => 'Class'
294
- store 'year'
295
- store 'transmission'
296
- store 'speeds'
297
- store 'turbo'
298
- store 'supercharger'
299
- store 'injection'
300
- store 'drive'
301
- end
302
- end
303
-
304
- # 2006--2010
305
- {
306
- 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
307
- 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
308
- 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
309
- 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
310
- # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
311
- }.sort { |a, b| a.first <=> b.first }.each do |year, options|
312
- import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
313
- :reject => (year == 2007) ? lambda { |row| row.values.first.blank? } : nil,
314
- :errata => errata) do
315
- key 'row_hash'
316
- store 'make_name', :field_name => 'make'
317
- store 'model_name', :field_name => 'model'
318
- store 'fuel_type_code', :field_name => 'FUEL TYPE'
319
- store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
320
- store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
321
- store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
322
- store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
323
- store 'cylinders', :field_name => 'NUMB CYL'
324
- store 'displacement', :field_name => 'DISPLACEMENT'
325
- store 'carline_class_code', :field_name => 'CLS'
326
- store 'carline_class_name', :field_name => 'CLASS'
327
- store 'year'
328
- store 'transmission'
329
- store 'speeds'
330
- store 'turbo'
331
- store 'supercharger'
332
- store 'injection'
333
- store 'drive'
334
- end
335
- end
336
-
337
- # associate :make, :key => :original_automobile_make_name, :foreign_key => :name
338
- # derive :automobile_model_id # creates models by name
339
- # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
340
-
341
- process 'Set adjusted fuel economy' do
342
- update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
343
- update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
344
- end
345
- end
346
-
347
- def name
348
- extra = []
349
- extra << "V#{cylinders}" if cylinders
350
- extra << "#{displacement}L" if displacement
351
- extra << "turbo" if turbo
352
- extra << "FI" if injection
353
- extra << "#{speeds}spd" if speeds.present?
354
- extra << transmission if transmission.present?
355
- extra << "(#{fuel_type.name})" if fuel_type
356
- extra.join(' ')
357
- end
358
-
359
- def fuel_economy_description
360
- [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
361
- end
362
- end
@@ -1,15 +0,0 @@
1
- class Country < ActiveRecord::Base
2
- set_primary_key :iso_3166
3
-
4
- data_miner do
5
- import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :encoding => 'ISO-8859-1', :skip => 2, :headers => false, :delimiter => ';' do
6
- key 'iso_3166', :field_number => 1
7
- store 'name', :field_number => 0
8
- end
9
-
10
- import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
11
- key 'iso_3166', :field_name => 'country code'
12
- store 'name', :field_name => 'country'
13
- end
14
- end
15
- end
@@ -1,311 +0,0 @@
1
- module TestDatabase
2
- extend self
3
-
4
- def connect
5
- @connection ||= ActiveRecord::Base.establish_connection(
6
- 'adapter' => 'mysql',
7
- 'database' => 'data_miner_test',
8
- 'username' => 'root',
9
- 'password' => 'password'
10
- )
11
- end
12
-
13
- def load_schema
14
- connect
15
-
16
- ActiveRecord::Schema.define(:version => 20090819143429) do
17
- create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
18
- t.integer "departures_performed"
19
- t.integer "payload"
20
- t.integer "seats"
21
- t.integer "passengers"
22
- t.integer "freight"
23
- t.integer "mail"
24
- t.integer "ramp_to_ramp"
25
- t.integer "air_time"
26
- t.float "load_factor"
27
- t.float "freight_share"
28
- t.integer "distance"
29
- t.integer "departures_scheduled"
30
- t.string "unique_carrier"
31
- t.integer "dot_airline_id"
32
- t.string "unique_carrier_name"
33
- t.string "unique_carrier_entity"
34
- t.string "region"
35
- t.string "carrier"
36
- t.string "carrier_name"
37
- t.integer "carrier_group"
38
- t.integer "carrier_group_new"
39
- t.string "origin_airport_iata"
40
- t.string "origin_city_name"
41
- t.integer "origin_city_num"
42
- t.string "origin_state_abr"
43
- t.string "origin_state_fips"
44
- t.string "origin_state_nm"
45
- t.string "origin_country_iso_3166"
46
- t.string "origin_country_name"
47
- t.integer "origin_wac"
48
- t.string "dest_airport_iata"
49
- t.string "dest_city_name"
50
- t.integer "dest_city_num"
51
- t.string "dest_state_abr"
52
- t.string "dest_state_fips"
53
- t.string "dest_state_nm"
54
- t.string "dest_country_iso_3166"
55
- t.string "dest_country_name"
56
- t.integer "dest_wac"
57
- t.integer "bts_aircraft_group"
58
- t.integer "bts_aircraft_type"
59
- t.integer "bts_aircraft_config"
60
- t.integer "year"
61
- t.integer "quarter"
62
- t.integer "month"
63
- t.integer "bts_distance_group"
64
- t.string "bts_service_class"
65
- t.string "data_source"
66
- t.float "seats_per_departure"
67
-
68
- t.string 'payload_units'
69
- t.string 'freight_units'
70
- t.string 'mail_units'
71
- t.string 'distance_units'
72
-
73
- t.datetime "created_at"
74
- t.datetime "updated_at"
75
-
76
- t.string "row_hash"
77
- end
78
- execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
79
-
80
- create_table 'tapped_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
81
- t.string 'i_am_just_here_to_get_in_the_way'
82
- end
83
-
84
- create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
85
- t.string 'iata_code'
86
- t.string 'name'
87
- t.string 'city'
88
- t.string 'country_name'
89
- t.float 'latitude'
90
- t.float 'longitude'
91
- t.datetime 'created_at'
92
- t.datetime 'updated_at'
93
- end
94
- execute 'ALTER TABLE airports ADD PRIMARY KEY (iata_code);'
95
-
96
- create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
97
- t.string "iso_3166"
98
- t.string "name"
99
- t.datetime "created_at"
100
- t.datetime "updated_at"
101
- end
102
- execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
103
-
104
- create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
105
- t.integer "number"
106
- t.string "name"
107
- t.datetime "updated_at"
108
- t.datetime "created_at"
109
- end
110
- execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
111
-
112
- create_table 'census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
113
- t.integer 'number'
114
- t.string 'name'
115
- t.datetime 'updated_at'
116
- t.datetime 'created_at'
117
- t.string 'census_region_name'
118
- t.integer 'census_region_number'
119
-
120
- end
121
- execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
122
-
123
- create_table 'census_division_deux', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
124
- t.integer 'number'
125
- t.string 'name'
126
- t.datetime 'updated_at'
127
- t.datetime 'created_at'
128
- t.string 'census_region_name'
129
- t.integer 'census_region_number'
130
-
131
- end
132
- execute 'ALTER TABLE census_division_deux ADD PRIMARY KEY (number);'
133
-
134
- create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
135
- t.integer 'number'
136
- t.string 'name'
137
- t.datetime 'updated_at'
138
- t.datetime 'created_at'
139
- t.string 'census_region_name'
140
- t.integer 'census_region_number'
141
-
142
- end
143
- execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
144
-
145
- create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
146
- t.float "fuel_efficiency_city"
147
- t.float "fuel_efficiency_highway"
148
- t.string "make_name"
149
- t.string "model_name"
150
- t.string "year"
151
- t.string "fuel_type_code"
152
- t.datetime "updated_at"
153
- t.datetime "created_at"
154
- t.string "transmission"
155
- t.string "drive"
156
- t.boolean "turbo"
157
- t.boolean "supercharger"
158
- t.integer "cylinders"
159
- t.float "displacement"
160
- t.float "raw_fuel_efficiency_city"
161
- t.float "raw_fuel_efficiency_highway"
162
- t.integer "carline_mfr_code"
163
- t.integer "vi_mfr_code"
164
- t.integer "carline_code"
165
- t.integer "carline_class_code"
166
- t.boolean "injection"
167
- t.string "carline_class_name"
168
- t.string "speeds"
169
-
170
- t.string 'raw_fuel_efficiency_highway_units'
171
- t.string 'raw_fuel_efficiency_city_units'
172
- t.string 'fuel_efficiency_highway_units'
173
- t.string 'fuel_efficiency_city_units'
174
-
175
- t.string "row_hash"
176
- end
177
- execute "ALTER TABLE automobile_variants ADD PRIMARY KEY (row_hash);"
178
-
179
- create_table "automobile_fuel_types", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
180
- t.string "name"
181
- t.datetime "created_at"
182
- t.datetime "updated_at"
183
- t.float "emission_factor"
184
- t.string "emission_factor_units"
185
- t.float "annual_distance"
186
- t.string "annual_distance_units"
187
- t.string "code"
188
- end
189
- execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
190
-
191
- create_table "residential_energy_consumption_survey_responses", :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
192
- t.integer "department_of_energy_identifier"
193
-
194
- t.string "residence_class"
195
- t.date "construction_year"
196
- t.string "construction_period"
197
- t.string "urbanity"
198
- t.string "dishwasher_use"
199
- t.string "central_ac_use"
200
- t.string "window_ac_use"
201
- t.string "clothes_washer_use"
202
- t.string "clothes_dryer_use"
203
-
204
- t.integer "census_division_number"
205
- t.string "census_division_name"
206
- t.integer "census_region_number"
207
- t.string "census_region_name"
208
-
209
- t.float "rooms"
210
- t.float "floorspace"
211
- t.integer "residents"
212
- t.boolean "ownership"
213
- t.boolean "thermostat_programmability"
214
- t.integer "refrigerator_count"
215
- t.integer "freezer_count"
216
- t.float "annual_energy_from_fuel_oil_for_heating_space"
217
- t.float "annual_energy_from_fuel_oil_for_heating_water"
218
- t.float "annual_energy_from_fuel_oil_for_appliances"
219
- t.float "annual_energy_from_natural_gas_for_heating_space"
220
- t.float "annual_energy_from_natural_gas_for_heating_water"
221
- t.float "annual_energy_from_natural_gas_for_appliances"
222
- t.float "annual_energy_from_propane_for_heating_space"
223
- t.float "annual_energy_from_propane_for_heating_water"
224
- t.float "annual_energy_from_propane_for_appliances"
225
- t.float "annual_energy_from_wood"
226
- t.float "annual_energy_from_kerosene"
227
- t.float "annual_energy_from_electricity_for_clothes_driers"
228
- t.float "annual_energy_from_electricity_for_dishwashers"
229
- t.float "annual_energy_from_electricity_for_freezers"
230
- t.float "annual_energy_from_electricity_for_refrigerators"
231
- t.float "annual_energy_from_electricity_for_air_conditioners"
232
- t.float "annual_energy_from_electricity_for_heating_space"
233
- t.float "annual_energy_from_electricity_for_heating_water"
234
- t.float "annual_energy_from_electricity_for_other_appliances"
235
- t.float "weighting"
236
- t.float "lighting_use"
237
- t.float "lighting_efficiency"
238
- t.integer "heating_degree_days"
239
- t.integer "cooling_degree_days"
240
- t.integer "total_rooms"
241
- t.integer "bathrooms"
242
- t.integer "halfbaths"
243
- t.integer "heated_garage"
244
- t.integer "attached_1car_garage"
245
- t.integer "detached_1car_garage"
246
- t.integer "attached_2car_garage"
247
- t.integer "detached_2car_garage"
248
- t.integer "attached_3car_garage"
249
- t.integer "detached_3car_garage"
250
- t.integer "lights_on_1_to_4_hours"
251
- t.integer "efficient_lights_on_1_to_4_hours"
252
- t.integer "lights_on_4_to_12_hours"
253
- t.integer "efficient_lights_on_4_to_12_hours"
254
- t.integer "lights_on_over_12_hours"
255
- t.integer "efficient_lights_on_over_12_hours"
256
- t.integer "outdoor_all_night_lights"
257
- t.integer "outdoor_all_night_gas_lights"
258
-
259
- t.datetime "created_at"
260
- t.datetime "updated_at"
261
- end
262
- execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
263
-
264
- create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
265
- t.string 'icao_code'
266
- t.string 'manufacturer_name'
267
- t.string 'name'
268
-
269
- t.string "bts_name"
270
- t.string "bts_aircraft_type_code"
271
-
272
- t.string 'brighter_planet_aircraft_class_code'
273
- # t.float 'm3'
274
- # t.float 'm2'
275
- # t.float 'm1'
276
- # t.float 'endpoint_fuel'
277
- t.datetime 'updated_at'
278
- t.datetime 'created_at'
279
- end
280
- execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
281
-
282
- create_table 'aircraft_deux', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
283
- t.string 'icao_code'
284
- t.string 'manufacturer_name'
285
- t.string 'name'
286
-
287
- t.string "bts_name"
288
- t.string "bts_aircraft_type_code"
289
-
290
- # t.string 'brighter_planet_aircraft_class_code'
291
- # t.float 'm3'
292
- # t.float 'm2'
293
- # t.float 'm1'
294
- # t.float 'endpoint_fuel'
295
- t.datetime 'updated_at'
296
- t.datetime 'created_at'
297
- end
298
- execute 'ALTER TABLE aircraft_deux ADD PRIMARY KEY (icao_code);'
299
- end
300
-
301
- DataMiner::Run.create_tables
302
- end
303
-
304
- def load_models
305
- load_schema
306
-
307
- Dir.glob(File.expand_path('*.rb', File.dirname(__FILE__))).each do |lib|
308
- require lib
309
- end
310
- end
311
- end