earth 0.4.12 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/.gitignore +1 -0
  2. data/Gemfile +1 -1
  3. data/earth.gemspec +7 -3
  4. data/features/automobile_make_model_year_variant.feature +10 -10
  5. data/features/step_definitions/data_steps.rb +1 -1
  6. data/features/support/env.rb +2 -2
  7. data/features/support/imports/automobile_fuel_good.csv +3 -3
  8. data/features/support/imports/automobile_make_model_year_variant_bad.csv +2 -2
  9. data/features/support/imports/automobile_make_model_year_variant_good.csv +2 -2
  10. data/lib/earth/air.rb +2 -1
  11. data/lib/earth/air/aircraft.rb +21 -8
  12. data/lib/earth/air/aircraft/data_miner.rb +85 -180
  13. data/lib/earth/air/aircraft_class.rb +9 -1
  14. data/lib/earth/air/aircraft_class/data_miner.rb +57 -20
  15. data/lib/earth/air/aircraft_fuel_use_equation.rb +26 -0
  16. data/lib/earth/air/aircraft_fuel_use_equation/data_miner.rb +26 -0
  17. data/lib/earth/air/aircraft_manufacturer.rb +0 -2
  18. data/lib/earth/air/aircraft_manufacturer/data_miner.rb +4 -1
  19. data/lib/earth/air/airline.rb +2 -11
  20. data/lib/earth/air/airline/data_miner.rb +10 -51
  21. data/lib/earth/air/airport.rb +2 -29
  22. data/lib/earth/air/airport/data_miner.rb +61 -56
  23. data/lib/earth/air/bts_aircraft.rb +7 -0
  24. data/lib/earth/air/bts_aircraft/data_miner.rb +15 -0
  25. data/lib/earth/air/data_miner.rb +2 -1
  26. data/lib/earth/air/flight_seat_class.rb +1 -4
  27. data/lib/earth/air/flight_segment.rb +16 -10
  28. data/lib/earth/air/flight_segment/data_miner.rb +170 -129
  29. data/lib/earth/automobile/automobile_make/data_miner.rb +12 -3
  30. data/lib/earth/automobile/automobile_make_model/data_miner.rb +4 -1
  31. data/lib/earth/automobile/automobile_make_model_year/data_miner.rb +4 -1
  32. data/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb +29 -16
  33. data/lib/earth/automobile/automobile_make_year/data_miner.rb +8 -2
  34. data/lib/earth/automobile/automobile_size_class/data_miner.rb +4 -1
  35. data/lib/earth/automobile/automobile_type_fuel_age/data_miner.rb +4 -1
  36. data/lib/earth/automobile/automobile_type_fuel_year/data_miner.rb +4 -2
  37. data/lib/earth/automobile/automobile_type_fuel_year_age/data_miner.rb +4 -1
  38. data/lib/earth/automobile/automobile_type_year/data_miner.rb +4 -1
  39. data/lib/earth/fuel/fuel/data_miner.rb +4 -1
  40. data/lib/earth/hospitality/lodging_class.rb +0 -16
  41. data/lib/earth/hospitality/lodging_class/data_miner.rb +11 -1
  42. data/lib/earth/inflectors.rb +1 -1
  43. data/lib/earth/locality/country/data_miner.rb +14 -14
  44. data/lib/earth/locality/urbanity/data_miner.rb +4 -1
  45. data/lib/earth/residence/air_conditioner_use/data_miner.rb +4 -1
  46. data/lib/earth/residence/clothes_machine_use/data_miner.rb +4 -1
  47. data/lib/earth/residence/dishwasher_use/data_miner.rb +4 -1
  48. data/lib/earth/residence/residence_appliance/data_miner.rb +5 -2
  49. data/lib/earth/residence/residence_class/data_miner.rb +4 -1
  50. data/lib/earth/version.rb +1 -1
  51. data/spec/earth_spec.rb +2 -2
  52. data/test/test_aircraft_match.rb +732 -0
  53. metadata +87 -51
  54. data/features/flight_segment.feature +0 -14
  55. data/features/support/imports/flight_segment_bad.csv +0 -8
  56. data/features/support/imports/flight_segment_good.csv +0 -8
  57. data/lib/earth/air/flight_fuel_type.rb +0 -12
  58. data/lib/earth/air/flight_fuel_type/data_miner.rb +0 -12
@@ -0,0 +1,7 @@
1
+ class BtsAircraft < ActiveRecord::Base
2
+ set_primary_key :bts_code
3
+
4
+ data_miner do
5
+ tap "Brighter Planet's BTS Aircraft data", Earth.taps_server
6
+ end
7
+ end
@@ -0,0 +1,15 @@
1
+ BtsAircraft.class_eval do
2
+ data_miner do
3
+ schema Earth.database_options do
4
+ string 'bts_code'
5
+ string 'description'
6
+ end
7
+
8
+ import "the BTS aircraft type lookup table",
9
+ :url => "http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_AIRCRAFT_TYPE",
10
+ :errata => { :url => 'https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdEZ2d3JQMzV5T1o1T3JmVlFyNUZxdEE&output=csv' } do
11
+ key 'bts_code', :field_name => 'Code'
12
+ store 'description', :field_name => 'Description'
13
+ end
14
+ end
15
+ end
@@ -2,10 +2,11 @@ require 'earth/locality'
2
2
  require 'earth/locality/data_miner'
3
3
  require 'earth/air/aircraft/data_miner'
4
4
  require 'earth/air/aircraft_class/data_miner'
5
+ require 'earth/air/aircraft_fuel_use_equation/data_miner'
5
6
  require 'earth/air/aircraft_manufacturer/data_miner'
6
7
  require 'earth/air/airline/data_miner'
7
8
  require 'earth/air/airport/data_miner'
9
+ require 'earth/air/bts_aircraft/data_miner'
8
10
  require 'earth/air/flight_distance_class/data_miner'
9
- require 'earth/air/flight_fuel_type/data_miner'
10
11
  require 'earth/air/flight_seat_class/data_miner'
11
12
  require 'earth/air/flight_segment/data_miner'
@@ -1,11 +1,8 @@
1
1
  class FlightSeatClass < ActiveRecord::Base
2
2
  set_primary_key :name
3
- # has_many :airline_seat_classes, :class_name => 'AirlineSeatClass'
4
- # has_many :aircraft_seat_classes, :class_name => 'AircraftSeatClass'
5
- # has_many :airline_aircraft_seat_classes, :class_name => 'AirlineAircraftSeatClass'
6
3
 
7
4
  falls_back_on :multiplier => 1
8
-
5
+
9
6
  data_miner do
10
7
  tap "Brighter Planet's sanitized flight seat class data", Earth.taps_server
11
8
  end
@@ -1,23 +1,29 @@
1
+ # need this for association with Aircraft through loose_tight_dictionary_cached_results
2
+ require 'loose_tight_dictionary/cached_result'
3
+
1
4
  class FlightSegment < ActiveRecord::Base
2
5
  set_primary_key :row_hash
3
6
 
4
7
  extend CohortScope
5
8
  self.minimum_cohort_size = 1
6
9
 
7
- belongs_to :airline, :foreign_key => 'airline_iata_code', :primary_key => 'iata_code'
8
- belongs_to :origin_airport, :foreign_key => 'origin_airport_iata_code', :primary_key => 'iata_code', :class_name => 'Airport'
9
- belongs_to :destination_airport, :foreign_key => 'destination_airport_iata_code', :primary_key => 'iata_code', :class_name => 'Airport'
10
- belongs_to :aircraft, :foreign_key => 'aircraft_bts_code', :primary_key => 'bts_code'
10
+ # If airport iata code is missing, associate with all airports in a city
11
+ # We need this to calculate distance when importing ICAO segments - see cm1 flight_segment.rb
12
+ has_many :origin_city_airports, :foreign_key => 'city', :primary_key => 'origin_airport_city', :class_name => 'Airport'
13
+ has_many :destination_city_airports, :foreign_key => 'city', :primary_key => 'destination_airport_city', :class_name => 'Airport'
11
14
 
12
- falls_back_on :distance => lambda { weighted_average(:distance, :weighted_by => :passengers) }, # 2077.1205 data1 10-12-2010
13
- :seats => lambda { weighted_average(:seats, :weighted_by => :passengers) }, # 144.15653537046 data1 10-12-2010
14
- :load_factor => lambda { weighted_average(:load_factor, :weighted_by => :passengers) }, # 0.78073233770097 data1 10-12-2010
15
- :freight_share => lambda { weighted_average(:freight_share, :weighted_by => :passengers) } # 0.022567224170157 data1 10-12-2010
15
+ # Enable flight_segment.aircraft
16
+ cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :description
17
+
18
+ falls_back_on :distance => lambda { weighted_average(:distance, :weighted_by => :passengers) }, # 2077.1205 data1 10-12-2010
19
+ :seats_per_flight => lambda { weighted_average(:seats_per_flight, :weighted_by => :passengers) }, # 144.15653537046 data1 10-12-2010
20
+ :load_factor => lambda { weighted_average(:load_factor, :weighted_by => :passengers) }, # 0.78073233770097 data1 10-12-2010
21
+ :freight_share => lambda { weighted_average(:freight_share, :weighted_by => :passengers) } # 0.022567224170157 data1 10-12-2010
16
22
 
17
23
  data_miner do
18
- tap "Brighter Planet's sanitized T100 data", Earth.taps_server
24
+ tap "Brighter Planet's sanitized flight segment data", Earth.taps_server
19
25
 
20
- process "pull dependencies" do
26
+ process "Pull dependencies" do
21
27
  run_data_miner_on_belongs_to_associations
22
28
  end
23
29
  end
@@ -1,3 +1,6 @@
1
+ # need this to run flight_segment.cache_aircraft!
2
+ require 'loose_tight_dictionary/cached_result'
3
+
1
4
  FlightSegment.class_eval do
2
5
  URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
3
6
  FORM_DATA = %{
@@ -157,76 +160,54 @@ FlightSegment.class_eval do
157
160
  VarDesc=DataSource&
158
161
  VarType=Char
159
162
  }.gsub /[\s]+/,''
160
-
163
+
161
164
  data_miner do
162
165
  schema Earth.database_options do
163
- string 'row_hash'
164
- integer 'departures_performed'
165
- integer 'passengers'
166
- integer 'total_seats'
167
- float 'payload' # theoretical max freight + mail + passengers; needs to be a float because the import includes a units conversion
168
- string 'payload_units'
169
- float 'freight' # this needs to be a float because the import includes a units conversion
170
- string 'freight_units'
171
- float 'mail' # this needs to be a float because the import includes a units conversion
172
- string 'mail_units'
173
- float 'load_factor'
174
- float 'freight_share'
175
- float 'seats'
176
- float 'distance'
177
- string 'distance_units'
178
- string 'aircraft_bts_code'
179
- string 'airline_iata_code'
180
- string 'origin_airport_iata_code'
181
- string 'origin_country_iso_3166_code'
182
- string 'destination_airport_iata_code'
183
- string 'destination_country_iso_3166_code'
184
- integer 'year'
185
- integer 'quarter'
186
- integer 'month'
187
- date 'approximate_date'
188
- # integer 'bts_aircraft_group_code'
189
- # string 'configuration_id'
190
- # integer 'bts_aircraft_configuration_code'
191
- # string 'distance_group'
192
- # integer 'bts_distance_group_code'
193
- # string 'bts_data_source_code'
194
- # integer 'ramp_to_ramp'
195
- # integer 'air_time'
196
- # integer 'departures_scheduled'
197
- # string 'dot_airline_id_code'
198
- # string 'unique_carrier_name'
199
- # string 'unique_carrier_entity'
200
- # string 'region'
201
- # string 'current_airline_iata_code'
202
- # string 'carrier_name'
203
- # integer 'carrier_group'
204
- # integer 'carrier_group_new'
205
- # string 'origin_city_name'
206
- # integer 'origin_city_num'
207
- # string 'origin_state_abr'
208
- # string 'origin_state_fips'
209
- # string 'origin_state_nm'
210
- # string 'origin_country_name'
211
- # integer 'origin_wac'
212
- # string 'dest_city_name'
213
- # integer 'dest_city_num'
214
- # string 'dest_state_abr'
215
- # string 'dest_state_fips'
216
- # string 'dest_state_nm'
217
- # string 'dest_country_name'
218
- # integer 'dest_wac'
219
- index 'airline_iata_code'
220
- index 'aircraft_bts_code'
221
- index 'origin_airport_iata_code'
222
- index 'destination_airport_iata_code'
166
+ string 'row_hash' # auto-generated primary key
167
+ string 'origin_airport_iata_code' # iata code
168
+ string 'origin_airport_city' # city
169
+ string 'origin_country_iso_3166_code' # iso code
170
+ string 'destination_airport_iata_code' # iata code
171
+ string 'destination_airport_city' # city
172
+ string 'destination_country_iso_3166_code' # iso code
173
+ string 'airline_bts_code' # bts code
174
+ string 'airline_icao_code' # icao code
175
+ string 'airline_name' # text description derived from bts or icao code
176
+ string 'aircraft_bts_code' # bts code
177
+ string 'aircraft_description' # text description derived from BTS T100 or ICAO TFS
178
+ integer 'flights' # number of flights over month or year
179
+ integer 'passengers' # total passengers on all flights
180
+ integer 'seats' # total seats on all flights
181
+ float 'seats_per_flight' # average seats per flight; make this a float
182
+ float 'load_factor' # passengers / seats
183
+ float 'freight_share' # (freight + mail) / (freight + mail + (passengers * average passenger weight))
184
+ float 'distance' # flight distance
185
+ string 'distance_units' # 'kilometres'
186
+ float 'payload_capacity' # aircraft maximum payload capacity rating; float b/c unit conversion
187
+ string 'payload_capacity_units' # 'kilograms'
188
+ float 'freight' # total freight on all flights performed; float b/c unit conversion
189
+ string 'freight_units' # 'kilograms'
190
+ float 'mail' # total mail on all flights performed; float b/c unit conversion
191
+ string 'mail_units' # 'kilograms'
192
+ integer 'month' # month of flight
193
+ integer 'year' # year of flight
194
+ date 'approximate_date' # assumed 14th day of month
195
+ string 'source' # 'BTS T100' or 'ICAO TFS'
196
+ index 'origin_airport_iata_code' # index for faster lookup by origin airport
197
+ index 'origin_airport_city' # index for faster lookup by origin city
198
+ index 'destination_airport_iata_code' # index for faster lookup by destination airport
199
+ index 'destination_airport_city' # index for faster lookup by destination city
200
+ index 'airline_bts_code' # index for faster lookup by airline bts code
201
+ index 'airline_icao_code' # index for faster lookup by airline icao code
202
+ index 'airline_name' # index for faster lookup by airline name
203
+ index 'aircraft_bts_code' # index for faster lookup by aircraft bts code
204
+ index 'aircraft_description' # index for faster lookup by aircraft
205
+ index 'year' # index for faster lookup by year
223
206
  end
224
207
 
225
208
  months = Hash.new
226
- (2008..2009).each do |year|
227
- # (2008..2008).each do |year| # DEBUG MODE!
209
+ (2009..2010).each do |year|
228
210
  (1..12).each do |month|
229
- # (1..1).each do |month| # DEBUG MODE!
230
211
  time = Time.gm year, month
231
212
  form_data = FORM_DATA.dup
232
213
  form_data.gsub! '__YEAR__', time.year.to_s
@@ -235,96 +216,156 @@ FlightSegment.class_eval do
235
216
  months[time] = form_data
236
217
  end
237
218
  end
238
- # creating dictionaries by hand so that a new one doesn't get created for every month
239
- # distance_group_dictionary = DataMiner::Dictionary.new :input => 'Code', :output => 'Description', :url => 'http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_DISTANCE_GROUP_500'
219
+
240
220
  months.each do |month, form_data|
241
- import "T100 flight segment data from #{month.strftime('%B %Y')}",
221
+ import "T100 flight segment data for #{month.strftime('%B %Y')}",
242
222
  :url => URL,
243
223
  :form_data => form_data,
244
224
  :compression => :zip,
245
- :glob => '/*.csv' do
246
-
225
+ :glob => '/*.csv',
226
+ :select => lambda { |record| record['DEPARTURES_PERFORMED'].to_i > 0 } do
247
227
  key 'row_hash'
248
-
249
- # store 'bts_aircraft_group_code', :field_name => 'AIRCRAFT_GROUP'
250
-
251
- # store 'configuration_id', :field_name => 'AIRCRAFT_CONFIG', :dictionary => configuration_dictionary
252
- # store 'bts_aircraft_configuration_code', :field_name => 'AIRCRAFT_CONFIG'
253
-
254
- # store 'distance_group', :field_name => 'DISTANCE_GROUP', :dictionary => distance_group_dictionary
255
- # store 'bts_distance_group_code', :field_name => 'DISTANCE_GROUP'
256
-
257
- # store 'bts_data_source_code', :field_name => 'DATA_SOURCE'
258
-
259
- # store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
260
- store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
261
- store 'passengers', :field_name => 'PASSENGERS'
262
- store 'total_seats', :field_name => 'SEATS'
263
- store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
264
- store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
265
- store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
266
- store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
267
- store 'aircraft_bts_code', :field_name => 'AIRCRAFT_TYPE'
268
- store 'airline_iata_code', :field_name => 'UNIQUE_CARRIER'
269
228
  store 'origin_airport_iata_code', :field_name => 'ORIGIN'
270
229
  store 'origin_country_iso_3166_code', :field_name => 'ORIGIN_COUNTRY'
271
230
  store 'destination_airport_iata_code', :field_name => 'DEST'
272
231
  store 'destination_country_iso_3166_code', :field_name => 'DEST_COUNTRY'
273
- store 'year', :field_name => 'YEAR'
274
- store 'quarter', :field_name => 'QUARTER'
232
+ store 'airline_bts_code', :field_name => 'UNIQUE_CARRIER', :nullify => true
233
+ store 'aircraft_bts_code', :field_name => 'AIRCRAFT_TYPE'
234
+ store 'flights', :field_name => 'DEPARTURES_PERFORMED'
235
+ store 'passengers', :field_name => 'PASSENGERS'
236
+ store 'seats', :field_name => 'SEATS'
237
+ store 'payload_capacity', :field_name => 'PAYLOAD', :units => 'pounds'
238
+ store 'freight', :field_name => 'FREIGHT', :units => 'pounds'
239
+ store 'mail', :field_name => 'MAIL', :units => 'pounds'
240
+ store 'distance', :field_name => 'DISTANCE', :units => 'miles'
275
241
  store 'month', :field_name => 'MONTH'
276
- # store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
277
- # store 'air_time', :field_name => 'AIR_TIME'
278
- # store 'dot_airline_id_code', :field_name => 'AIRLINE_ID'
279
- # store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
280
- # store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
281
- # store 'region', :field_name => 'REGION'
282
- # store 'current_airline_iata_code', :field_name => 'CARRIER'
283
- # store 'carrier_name', :field_name => 'CARRIER_NAME'
284
- # store 'carrier_group', :field_name => 'CARRIER_GROUP'
285
- # store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
286
- # store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
287
- # store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
288
- # store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
289
- # store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
290
- # store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
291
- # store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
292
- # store 'origin_wac', :field_name => 'ORIGIN_WAC'
293
- # store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
294
- # store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
295
- # store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
296
- # store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
297
- # store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
298
- # store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
299
- # store 'dest_wac', :field_name => 'DEST_WAC'
242
+ store 'year', :field_name => 'YEAR'
243
+ store 'source', :static => 'BTS T100'
300
244
  end
301
245
  end
302
246
 
303
- process "Derive freight share as a fraction of the total weight carried" do
304
- update_all 'freight_share = (freight + mail) / (freight + mail + (passengers * 90.718474))', '(freight + mail + passengers) > 0'
247
+ # verify origin_airport_iata_code is in airports
248
+ # verify destination_airport_iata_code is in airports
249
+ # verify origin_country_iso_3166_code is in countries
250
+ # verify destination_country_iso_3166_code is in countries
251
+ # verify airline_bts_code appears in airlines
252
+ # verify aircraft_description is never missing
253
+ # verify year is never missing
254
+
255
+ process "Ensure Airline is populated" do
256
+ Airline.run_data_miner!
257
+ end
258
+
259
+ process "Look up airline name based on BTS code" do
260
+ connection.select_values("SELECT DISTINCT airline_bts_code FROM flight_segments WHERE airline_bts_code IS NOT NULL").each do |bts_code|
261
+ name = Airline.find_by_bts_code(bts_code).name
262
+ update_all %{ airline_name = "#{name}" }, %{ airline_bts_code = "#{bts_code}" }
263
+ end
264
+ end
265
+
266
+ process "Ensure BtsAircraft is populated" do
267
+ BtsAircraft.run_data_miner!
268
+ end
269
+
270
+ process "Look up aircraft description based on BTS code" do
271
+ connection.select_values("SELECT DISTINCT aircraft_bts_code FROM flight_segments WHERE aircraft_bts_code IS NOT NULL").each do |bts_code|
272
+ description = BtsAircraft.find_by_bts_code(bts_code).description.downcase
273
+ update_all %{ aircraft_description = "#{description}" }, %{ aircraft_bts_code = "#{bts_code}" }
274
+ end
275
+ end
276
+
277
+ %w{ payload_capacity freight mail }.each do |field|
278
+ process "Convert #{field} from pounds to kilograms" do
279
+ conversion_factor = 1.pounds.to(:kilograms)
280
+ connection.execute %{
281
+ UPDATE flight_segments
282
+ SET #{field} = #{field} * #{conversion_factor},
283
+ #{field + '_units'} = 'kilograms'
284
+ WHERE #{field + '_units'} = 'pounds'
285
+ }
286
+ end
305
287
  end
306
288
 
307
- process "Derive load factor, which is passengers divided by the total seats available" do
308
- update_all 'load_factor = passengers / total_seats', 'total_seats > 0'
289
+ process "Convert distance from miles to kilometres" do
290
+ conversion_factor = 1.miles.to(:kilometres)
291
+ connection.execute %{
292
+ UPDATE flight_segments
293
+ SET distance = distance * #{conversion_factor},
294
+ distance_units = 'kilometres'
295
+ WHERE distance_units = 'miles'
296
+ }
309
297
  end
310
298
 
311
- process "Assume a load factor of 1 where passengers > total seats available" do
312
- update_all 'load_factor = 1', 'passengers > total_seats AND total_seats > 0'
299
+ process "Derive load factor, which is passengers divided by available seats" do
300
+ update_all 'load_factor = passengers / seats', 'seats > 0'
313
301
  end
314
302
 
315
- process "Derive average seats per departure" do
316
- update_all 'seats = total_seats / departures_performed', 'departures_performed > 0'
303
+ process "Assume a load factor of 1 where passengers > available seats" do
304
+ update_all 'load_factor = 1', 'passengers > seats AND seats > 0'
305
+ end
306
+
307
+ process "Derive freight share as a fraction of the total weight carried" do
308
+ update_all 'freight_share = (freight + mail) / (freight + mail + (passengers * 90.718474))', '(freight + mail + passengers) > 0'
309
+ end
310
+
311
+ process "Derive average seats per flight" do
312
+ update_all 'seats_per_flight = seats / flights', 'flights > 0'
317
313
  end
318
314
 
319
315
  process "Add a useful date field" do
320
- update_all 'approximate_date = DATE(CONCAT_WS("-", year, month, "14"))'
316
+ update_all 'approximate_date = DATE(CONCAT_WS("-", year, month, "14"))', 'month IS NOT NULL'
321
317
  end
322
318
 
323
- # FIXME TODO make this verification check actual aircraft codes in Aircraft
324
- verify 'All segments have an associated aircraft' do
325
- FlightSegment.where(:aircraft_bts_code => [nil, '']).first.nil?
319
+ process "Ensure Aircraft is populated" do
320
+ Aircraft.run_data_miner!
326
321
  end
327
322
 
328
- # FIXME TODO finish verification
323
+ process "Cache fuzzy matches between FlightSegment aircraft_description and Aircraft description" do
324
+ LooseTightDictionary::CachedResult.setup
325
+ FlightSegment.find_by_sql("SELECT DISTINCT aircraft_description FROM flight_segments WHERE aircraft_description IS NOT NULL").each do |flight_segment|
326
+ original_description = flight_segment.aircraft_description
327
+
328
+ # If the flight segment's aircraft_description contains '/' then it describes multiple aircraft.
329
+ # We need to synthesize descriptions for those aircraft, find all Aircraft that fuzzily match the
330
+ # synthesized descriptions, and associate those Aircraft with the original aircraft_description.
331
+ # e.g. boeing 747-100/200
332
+ if original_description.include?("/")
333
+ # Pull out the complete first aircraft description
334
+ # e.g. 'boeing 747-100'
335
+ first_description = original_description.split('/')[0]
336
+
337
+ # Pull out the root of the description - the text up to and including the last ' ' or '-'
338
+ # e.g. 'boeing 747-'
339
+ root_length = first_description.rindex('-')
340
+ root = first_description.slice(0..root_length)
341
+
342
+ # Pull out the suffixes - the text separated by forward slashes
343
+ # e.g. ['100', '200']
344
+ suffixes = original_description.split(root)[1].split('/')
345
+
346
+ # Create an array of synthesized descriptions by appending each suffix to the root
347
+ # e.g. ['boeing 747-100', 'boeing 747-200']
348
+ suffixes.map{ |suffix| root + suffix }.each do |synthesized_description|
349
+ # Look up the Aircraft that match each synthesized description and associate
350
+ # them with the original flight segment aircraft_description
351
+ Aircraft.loose_tight_dictionary.find_all(synthesized_description).each do |aircraft|
352
+ attrs = {
353
+ :a_class => "Aircraft",
354
+ :a => aircraft.description,
355
+ :b_class => "FlightSegment",
356
+ :b => original_description
357
+ }
358
+ unless ::LooseTightDictionary::CachedResult.exists? attrs
359
+ ::LooseTightDictionary::CachedResult.create! attrs
360
+ end
361
+ end
362
+ end
363
+ # If the flight segment's aircraft_description doesn't contain '/' we can use
364
+ # a method provided by loose_tight_dictionary to associate it with Aircraft
365
+ else
366
+ flight_segment.cache_aircraft!
367
+ end
368
+ end
369
+ end
329
370
  end
330
371
  end
@@ -10,8 +10,11 @@ AutomobileMake.class_eval do
10
10
  string 'fuel_efficiency_units'
11
11
  end
12
12
 
13
- process "Derive manufacturer names from automobile make model year variants" do
13
+ process "Ensure AutomobileMakeModelYearVariant is populated" do
14
14
  AutomobileMakeModelYearVariant.run_data_miner!
15
+ end
16
+
17
+ process "Derive manufacturer names from automobile make model year variants" do
15
18
  INSERT_IGNORE %{INTO automobile_makes(name)
16
19
  SELECT DISTINCT automobile_make_model_year_variants.make_name
17
20
  FROM automobile_make_model_year_variants
@@ -20,18 +23,24 @@ AutomobileMake.class_eval do
20
23
  }
21
24
  end
22
25
 
26
+ process "Ensure AutomobileMakeFleetYear is populated" do
27
+ AutomobileMakeFleetYear.run_data_miner!
28
+ end
29
+
23
30
  # sabshere 1/31/11 add Avanti, DaimlerChrysler, IHC, Tesla, etc.
24
31
  process "Derive extra manufacturer names from CAFE data" do
25
- AutomobileMakeFleetYear.run_data_miner!
26
32
  INSERT_IGNORE %{INTO automobile_makes(name)
27
33
  SELECT DISTINCT automobile_make_fleet_years.make_name
28
34
  FROM automobile_make_fleet_years
29
35
  }
30
36
  end
31
37
 
38
+ process "Ensure AutomobileMakeFleetYear is populated" do
39
+ AutomobileMakeFleetYear.run_data_miner!
40
+ end
41
+
32
42
  # FIXME TODO make this a method on AutomobileMake?
33
43
  process "Calculate fuel efficiency from automobile make fleet years for makes with CAFE data" do
34
- AutomobileMakeFleetYear.run_data_miner!
35
44
  make_fleet_years = AutomobileMakeFleetYear.arel_table
36
45
  makes = AutomobileMake.arel_table
37
46
  conditional_relation = makes[:name].eq(make_fleet_years[:make_name])