earth 0.4.12 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/earth.gemspec +7 -3
- data/features/automobile_make_model_year_variant.feature +10 -10
- data/features/step_definitions/data_steps.rb +1 -1
- data/features/support/env.rb +2 -2
- data/features/support/imports/automobile_fuel_good.csv +3 -3
- data/features/support/imports/automobile_make_model_year_variant_bad.csv +2 -2
- data/features/support/imports/automobile_make_model_year_variant_good.csv +2 -2
- data/lib/earth/air.rb +2 -1
- data/lib/earth/air/aircraft.rb +21 -8
- data/lib/earth/air/aircraft/data_miner.rb +85 -180
- data/lib/earth/air/aircraft_class.rb +9 -1
- data/lib/earth/air/aircraft_class/data_miner.rb +57 -20
- data/lib/earth/air/aircraft_fuel_use_equation.rb +26 -0
- data/lib/earth/air/aircraft_fuel_use_equation/data_miner.rb +26 -0
- data/lib/earth/air/aircraft_manufacturer.rb +0 -2
- data/lib/earth/air/aircraft_manufacturer/data_miner.rb +4 -1
- data/lib/earth/air/airline.rb +2 -11
- data/lib/earth/air/airline/data_miner.rb +10 -51
- data/lib/earth/air/airport.rb +2 -29
- data/lib/earth/air/airport/data_miner.rb +61 -56
- data/lib/earth/air/bts_aircraft.rb +7 -0
- data/lib/earth/air/bts_aircraft/data_miner.rb +15 -0
- data/lib/earth/air/data_miner.rb +2 -1
- data/lib/earth/air/flight_seat_class.rb +1 -4
- data/lib/earth/air/flight_segment.rb +16 -10
- data/lib/earth/air/flight_segment/data_miner.rb +170 -129
- data/lib/earth/automobile/automobile_make/data_miner.rb +12 -3
- data/lib/earth/automobile/automobile_make_model/data_miner.rb +4 -1
- data/lib/earth/automobile/automobile_make_model_year/data_miner.rb +4 -1
- data/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb +29 -16
- data/lib/earth/automobile/automobile_make_year/data_miner.rb +8 -2
- data/lib/earth/automobile/automobile_size_class/data_miner.rb +4 -1
- data/lib/earth/automobile/automobile_type_fuel_age/data_miner.rb +4 -1
- data/lib/earth/automobile/automobile_type_fuel_year/data_miner.rb +4 -2
- data/lib/earth/automobile/automobile_type_fuel_year_age/data_miner.rb +4 -1
- data/lib/earth/automobile/automobile_type_year/data_miner.rb +4 -1
- data/lib/earth/fuel/fuel/data_miner.rb +4 -1
- data/lib/earth/hospitality/lodging_class.rb +0 -16
- data/lib/earth/hospitality/lodging_class/data_miner.rb +11 -1
- data/lib/earth/inflectors.rb +1 -1
- data/lib/earth/locality/country/data_miner.rb +14 -14
- data/lib/earth/locality/urbanity/data_miner.rb +4 -1
- data/lib/earth/residence/air_conditioner_use/data_miner.rb +4 -1
- data/lib/earth/residence/clothes_machine_use/data_miner.rb +4 -1
- data/lib/earth/residence/dishwasher_use/data_miner.rb +4 -1
- data/lib/earth/residence/residence_appliance/data_miner.rb +5 -2
- data/lib/earth/residence/residence_class/data_miner.rb +4 -1
- data/lib/earth/version.rb +1 -1
- data/spec/earth_spec.rb +2 -2
- data/test/test_aircraft_match.rb +732 -0
- metadata +87 -51
- data/features/flight_segment.feature +0 -14
- data/features/support/imports/flight_segment_bad.csv +0 -8
- data/features/support/imports/flight_segment_good.csv +0 -8
- data/lib/earth/air/flight_fuel_type.rb +0 -12
- data/lib/earth/air/flight_fuel_type/data_miner.rb +0 -12
@@ -0,0 +1,15 @@
|
|
1
|
+
BtsAircraft.class_eval do
|
2
|
+
data_miner do
|
3
|
+
schema Earth.database_options do
|
4
|
+
string 'bts_code'
|
5
|
+
string 'description'
|
6
|
+
end
|
7
|
+
|
8
|
+
import "the BTS aircraft type lookup table",
|
9
|
+
:url => "http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_AIRCRAFT_TYPE",
|
10
|
+
:errata => { :url => 'https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdEZ2d3JQMzV5T1o1T3JmVlFyNUZxdEE&output=csv' } do
|
11
|
+
key 'bts_code', :field_name => 'Code'
|
12
|
+
store 'description', :field_name => 'Description'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/earth/air/data_miner.rb
CHANGED
@@ -2,10 +2,11 @@ require 'earth/locality'
|
|
2
2
|
require 'earth/locality/data_miner'
|
3
3
|
require 'earth/air/aircraft/data_miner'
|
4
4
|
require 'earth/air/aircraft_class/data_miner'
|
5
|
+
require 'earth/air/aircraft_fuel_use_equation/data_miner'
|
5
6
|
require 'earth/air/aircraft_manufacturer/data_miner'
|
6
7
|
require 'earth/air/airline/data_miner'
|
7
8
|
require 'earth/air/airport/data_miner'
|
9
|
+
require 'earth/air/bts_aircraft/data_miner'
|
8
10
|
require 'earth/air/flight_distance_class/data_miner'
|
9
|
-
require 'earth/air/flight_fuel_type/data_miner'
|
10
11
|
require 'earth/air/flight_seat_class/data_miner'
|
11
12
|
require 'earth/air/flight_segment/data_miner'
|
@@ -1,11 +1,8 @@
|
|
1
1
|
class FlightSeatClass < ActiveRecord::Base
|
2
2
|
set_primary_key :name
|
3
|
-
# has_many :airline_seat_classes, :class_name => 'AirlineSeatClass'
|
4
|
-
# has_many :aircraft_seat_classes, :class_name => 'AircraftSeatClass'
|
5
|
-
# has_many :airline_aircraft_seat_classes, :class_name => 'AirlineAircraftSeatClass'
|
6
3
|
|
7
4
|
falls_back_on :multiplier => 1
|
8
|
-
|
5
|
+
|
9
6
|
data_miner do
|
10
7
|
tap "Brighter Planet's sanitized flight seat class data", Earth.taps_server
|
11
8
|
end
|
@@ -1,23 +1,29 @@
|
|
1
|
+
# need this for association with Aircraft through loose_tight_dictionary_cached_results
|
2
|
+
require 'loose_tight_dictionary/cached_result'
|
3
|
+
|
1
4
|
class FlightSegment < ActiveRecord::Base
|
2
5
|
set_primary_key :row_hash
|
3
6
|
|
4
7
|
extend CohortScope
|
5
8
|
self.minimum_cohort_size = 1
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
10
|
+
# If airport iata code is missing, associate with all airports in a city
|
11
|
+
# We need this to calculate distance when importing ICAO segments - see cm1 flight_segment.rb
|
12
|
+
has_many :origin_city_airports, :foreign_key => 'city', :primary_key => 'origin_airport_city', :class_name => 'Airport'
|
13
|
+
has_many :destination_city_airports, :foreign_key => 'city', :primary_key => 'destination_airport_city', :class_name => 'Airport'
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
# Enable flight_segment.aircraft
|
16
|
+
cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :description
|
17
|
+
|
18
|
+
falls_back_on :distance => lambda { weighted_average(:distance, :weighted_by => :passengers) }, # 2077.1205 data1 10-12-2010
|
19
|
+
:seats_per_flight => lambda { weighted_average(:seats_per_flight, :weighted_by => :passengers) }, # 144.15653537046 data1 10-12-2010
|
20
|
+
:load_factor => lambda { weighted_average(:load_factor, :weighted_by => :passengers) }, # 0.78073233770097 data1 10-12-2010
|
21
|
+
:freight_share => lambda { weighted_average(:freight_share, :weighted_by => :passengers) } # 0.022567224170157 data1 10-12-2010
|
16
22
|
|
17
23
|
data_miner do
|
18
|
-
tap "Brighter Planet's sanitized
|
24
|
+
tap "Brighter Planet's sanitized flight segment data", Earth.taps_server
|
19
25
|
|
20
|
-
process "
|
26
|
+
process "Pull dependencies" do
|
21
27
|
run_data_miner_on_belongs_to_associations
|
22
28
|
end
|
23
29
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# need this to run flight_segment.cache_aircraft!
|
2
|
+
require 'loose_tight_dictionary/cached_result'
|
3
|
+
|
1
4
|
FlightSegment.class_eval do
|
2
5
|
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
3
6
|
FORM_DATA = %{
|
@@ -157,76 +160,54 @@ FlightSegment.class_eval do
|
|
157
160
|
VarDesc=DataSource&
|
158
161
|
VarType=Char
|
159
162
|
}.gsub /[\s]+/,''
|
160
|
-
|
163
|
+
|
161
164
|
data_miner do
|
162
165
|
schema Earth.database_options do
|
163
|
-
string
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
string
|
169
|
-
|
170
|
-
string
|
171
|
-
|
172
|
-
string
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
string
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
#
|
189
|
-
|
190
|
-
|
191
|
-
#
|
192
|
-
#
|
193
|
-
|
194
|
-
#
|
195
|
-
#
|
196
|
-
#
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
# integer 'carrier_group'
|
204
|
-
# integer 'carrier_group_new'
|
205
|
-
# string 'origin_city_name'
|
206
|
-
# integer 'origin_city_num'
|
207
|
-
# string 'origin_state_abr'
|
208
|
-
# string 'origin_state_fips'
|
209
|
-
# string 'origin_state_nm'
|
210
|
-
# string 'origin_country_name'
|
211
|
-
# integer 'origin_wac'
|
212
|
-
# string 'dest_city_name'
|
213
|
-
# integer 'dest_city_num'
|
214
|
-
# string 'dest_state_abr'
|
215
|
-
# string 'dest_state_fips'
|
216
|
-
# string 'dest_state_nm'
|
217
|
-
# string 'dest_country_name'
|
218
|
-
# integer 'dest_wac'
|
219
|
-
index 'airline_iata_code'
|
220
|
-
index 'aircraft_bts_code'
|
221
|
-
index 'origin_airport_iata_code'
|
222
|
-
index 'destination_airport_iata_code'
|
166
|
+
string 'row_hash' # auto-generated primary key
|
167
|
+
string 'origin_airport_iata_code' # iata code
|
168
|
+
string 'origin_airport_city' # city
|
169
|
+
string 'origin_country_iso_3166_code' # iso code
|
170
|
+
string 'destination_airport_iata_code' # iata code
|
171
|
+
string 'destination_airport_city' # city
|
172
|
+
string 'destination_country_iso_3166_code' # iso code
|
173
|
+
string 'airline_bts_code' # bts code
|
174
|
+
string 'airline_icao_code' # icao code
|
175
|
+
string 'airline_name' # text description derived from bts or icao code
|
176
|
+
string 'aircraft_bts_code' # bts code
|
177
|
+
string 'aircraft_description' # text description derived from BTS T100 or ICAO TFS
|
178
|
+
integer 'flights' # number of flights over month or year
|
179
|
+
integer 'passengers' # total passengers on all flights
|
180
|
+
integer 'seats' # total seats on all flights
|
181
|
+
float 'seats_per_flight' # average seats per flight; make this a float
|
182
|
+
float 'load_factor' # passengers / seats
|
183
|
+
float 'freight_share' # (freight + mail) / (freight + mail + (passengers * average passenger weight))
|
184
|
+
float 'distance' # flight distance
|
185
|
+
string 'distance_units' # 'kilometres'
|
186
|
+
float 'payload_capacity' # aircraft maximum payload capacity rating; float b/c unit conversion
|
187
|
+
string 'payload_capacity_units' # 'kilograms'
|
188
|
+
float 'freight' # total freight on all flights performed; float b/c unit conversion
|
189
|
+
string 'freight_units' # 'kilograms'
|
190
|
+
float 'mail' # total mail on all flights performed; float b/c unit conversion
|
191
|
+
string 'mail_units' # 'kilograms'
|
192
|
+
integer 'month' # month of flight
|
193
|
+
integer 'year' # year of flight
|
194
|
+
date 'approximate_date' # assumed 14th day of month
|
195
|
+
string 'source' # 'BTS T100' or 'ICAO TFS'
|
196
|
+
index 'origin_airport_iata_code' # index for faster lookup by origin airport
|
197
|
+
index 'origin_airport_city' # index for faster lookup by origin city
|
198
|
+
index 'destination_airport_iata_code' # index for faster lookup by destination airport
|
199
|
+
index 'destination_airport_city' # index for faster lookup by destination city
|
200
|
+
index 'airline_bts_code' # index for faster lookup by airline bts code
|
201
|
+
index 'airline_icao_code' # index for faster lookup by airline icao code
|
202
|
+
index 'airline_name' # index for faster lookup by airline name
|
203
|
+
index 'aircraft_bts_code' # index for faster lookup by aircraft bts code
|
204
|
+
index 'aircraft_description' # index for faster lookup by aircraft
|
205
|
+
index 'year' # index for faster lookup by year
|
223
206
|
end
|
224
207
|
|
225
208
|
months = Hash.new
|
226
|
-
(
|
227
|
-
# (2008..2008).each do |year| # DEBUG MODE!
|
209
|
+
(2009..2010).each do |year|
|
228
210
|
(1..12).each do |month|
|
229
|
-
# (1..1).each do |month| # DEBUG MODE!
|
230
211
|
time = Time.gm year, month
|
231
212
|
form_data = FORM_DATA.dup
|
232
213
|
form_data.gsub! '__YEAR__', time.year.to_s
|
@@ -235,96 +216,156 @@ FlightSegment.class_eval do
|
|
235
216
|
months[time] = form_data
|
236
217
|
end
|
237
218
|
end
|
238
|
-
|
239
|
-
# distance_group_dictionary = DataMiner::Dictionary.new :input => 'Code', :output => 'Description', :url => 'http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_DISTANCE_GROUP_500'
|
219
|
+
|
240
220
|
months.each do |month, form_data|
|
241
|
-
import "T100 flight segment data
|
221
|
+
import "T100 flight segment data for #{month.strftime('%B %Y')}",
|
242
222
|
:url => URL,
|
243
223
|
:form_data => form_data,
|
244
224
|
:compression => :zip,
|
245
|
-
:glob => '/*.csv'
|
246
|
-
|
225
|
+
:glob => '/*.csv',
|
226
|
+
:select => lambda { |record| record['DEPARTURES_PERFORMED'].to_i > 0 } do
|
247
227
|
key 'row_hash'
|
248
|
-
|
249
|
-
# store 'bts_aircraft_group_code', :field_name => 'AIRCRAFT_GROUP'
|
250
|
-
|
251
|
-
# store 'configuration_id', :field_name => 'AIRCRAFT_CONFIG', :dictionary => configuration_dictionary
|
252
|
-
# store 'bts_aircraft_configuration_code', :field_name => 'AIRCRAFT_CONFIG'
|
253
|
-
|
254
|
-
# store 'distance_group', :field_name => 'DISTANCE_GROUP', :dictionary => distance_group_dictionary
|
255
|
-
# store 'bts_distance_group_code', :field_name => 'DISTANCE_GROUP'
|
256
|
-
|
257
|
-
# store 'bts_data_source_code', :field_name => 'DATA_SOURCE'
|
258
|
-
|
259
|
-
# store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
260
|
-
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
261
|
-
store 'passengers', :field_name => 'PASSENGERS'
|
262
|
-
store 'total_seats', :field_name => 'SEATS'
|
263
|
-
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
264
|
-
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
265
|
-
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
266
|
-
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
267
|
-
store 'aircraft_bts_code', :field_name => 'AIRCRAFT_TYPE'
|
268
|
-
store 'airline_iata_code', :field_name => 'UNIQUE_CARRIER'
|
269
228
|
store 'origin_airport_iata_code', :field_name => 'ORIGIN'
|
270
229
|
store 'origin_country_iso_3166_code', :field_name => 'ORIGIN_COUNTRY'
|
271
230
|
store 'destination_airport_iata_code', :field_name => 'DEST'
|
272
231
|
store 'destination_country_iso_3166_code', :field_name => 'DEST_COUNTRY'
|
273
|
-
store '
|
274
|
-
store '
|
232
|
+
store 'airline_bts_code', :field_name => 'UNIQUE_CARRIER', :nullify => true
|
233
|
+
store 'aircraft_bts_code', :field_name => 'AIRCRAFT_TYPE'
|
234
|
+
store 'flights', :field_name => 'DEPARTURES_PERFORMED'
|
235
|
+
store 'passengers', :field_name => 'PASSENGERS'
|
236
|
+
store 'seats', :field_name => 'SEATS'
|
237
|
+
store 'payload_capacity', :field_name => 'PAYLOAD', :units => 'pounds'
|
238
|
+
store 'freight', :field_name => 'FREIGHT', :units => 'pounds'
|
239
|
+
store 'mail', :field_name => 'MAIL', :units => 'pounds'
|
240
|
+
store 'distance', :field_name => 'DISTANCE', :units => 'miles'
|
275
241
|
store 'month', :field_name => 'MONTH'
|
276
|
-
|
277
|
-
|
278
|
-
# store 'dot_airline_id_code', :field_name => 'AIRLINE_ID'
|
279
|
-
# store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
280
|
-
# store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
281
|
-
# store 'region', :field_name => 'REGION'
|
282
|
-
# store 'current_airline_iata_code', :field_name => 'CARRIER'
|
283
|
-
# store 'carrier_name', :field_name => 'CARRIER_NAME'
|
284
|
-
# store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
285
|
-
# store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
286
|
-
# store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
287
|
-
# store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
288
|
-
# store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
289
|
-
# store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
290
|
-
# store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
291
|
-
# store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
292
|
-
# store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
293
|
-
# store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
294
|
-
# store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
295
|
-
# store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
296
|
-
# store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
297
|
-
# store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
298
|
-
# store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
299
|
-
# store 'dest_wac', :field_name => 'DEST_WAC'
|
242
|
+
store 'year', :field_name => 'YEAR'
|
243
|
+
store 'source', :static => 'BTS T100'
|
300
244
|
end
|
301
245
|
end
|
302
246
|
|
303
|
-
|
304
|
-
|
247
|
+
# verify origin_airport_iata_code is in airports
|
248
|
+
# verify destination_airport_iata_code is in airports
|
249
|
+
# verify origin_country_iso_3166_code is in countries
|
250
|
+
# verify destination_country_iso_3166_code is in countries
|
251
|
+
# verify airline_bts_code appears in airlines
|
252
|
+
# verify aircraft_description is never missing
|
253
|
+
# verify year is never missing
|
254
|
+
|
255
|
+
process "Ensure Airline is populated" do
|
256
|
+
Airline.run_data_miner!
|
257
|
+
end
|
258
|
+
|
259
|
+
process "Look up airline name based on BTS code" do
|
260
|
+
connection.select_values("SELECT DISTINCT airline_bts_code FROM flight_segments WHERE airline_bts_code IS NOT NULL").each do |bts_code|
|
261
|
+
name = Airline.find_by_bts_code(bts_code).name
|
262
|
+
update_all %{ airline_name = "#{name}" }, %{ airline_bts_code = "#{bts_code}" }
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
process "Ensure BtsAircraft is populated" do
|
267
|
+
BtsAircraft.run_data_miner!
|
268
|
+
end
|
269
|
+
|
270
|
+
process "Look up aircraft description based on BTS code" do
|
271
|
+
connection.select_values("SELECT DISTINCT aircraft_bts_code FROM flight_segments WHERE aircraft_bts_code IS NOT NULL").each do |bts_code|
|
272
|
+
description = BtsAircraft.find_by_bts_code(bts_code).description.downcase
|
273
|
+
update_all %{ aircraft_description = "#{description}" }, %{ aircraft_bts_code = "#{bts_code}" }
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
%w{ payload_capacity freight mail }.each do |field|
|
278
|
+
process "Convert #{field} from pounds to kilograms" do
|
279
|
+
conversion_factor = 1.pounds.to(:kilograms)
|
280
|
+
connection.execute %{
|
281
|
+
UPDATE flight_segments
|
282
|
+
SET #{field} = #{field} * #{conversion_factor},
|
283
|
+
#{field + '_units'} = 'kilograms'
|
284
|
+
WHERE #{field + '_units'} = 'pounds'
|
285
|
+
}
|
286
|
+
end
|
305
287
|
end
|
306
288
|
|
307
|
-
process "
|
308
|
-
|
289
|
+
process "Convert distance from miles to kilometres" do
|
290
|
+
conversion_factor = 1.miles.to(:kilometres)
|
291
|
+
connection.execute %{
|
292
|
+
UPDATE flight_segments
|
293
|
+
SET distance = distance * #{conversion_factor},
|
294
|
+
distance_units = 'kilometres'
|
295
|
+
WHERE distance_units = 'miles'
|
296
|
+
}
|
309
297
|
end
|
310
298
|
|
311
|
-
process "
|
312
|
-
update_all 'load_factor =
|
299
|
+
process "Derive load factor, which is passengers divided by available seats" do
|
300
|
+
update_all 'load_factor = passengers / seats', 'seats > 0'
|
313
301
|
end
|
314
302
|
|
315
|
-
process "
|
316
|
-
update_all '
|
303
|
+
process "Assume a load factor of 1 where passengers > available seats" do
|
304
|
+
update_all 'load_factor = 1', 'passengers > seats AND seats > 0'
|
305
|
+
end
|
306
|
+
|
307
|
+
process "Derive freight share as a fraction of the total weight carried" do
|
308
|
+
update_all 'freight_share = (freight + mail) / (freight + mail + (passengers * 90.718474))', '(freight + mail + passengers) > 0'
|
309
|
+
end
|
310
|
+
|
311
|
+
process "Derive average seats per flight" do
|
312
|
+
update_all 'seats_per_flight = seats / flights', 'flights > 0'
|
317
313
|
end
|
318
314
|
|
319
315
|
process "Add a useful date field" do
|
320
|
-
update_all 'approximate_date = DATE(CONCAT_WS("-", year, month, "14"))'
|
316
|
+
update_all 'approximate_date = DATE(CONCAT_WS("-", year, month, "14"))', 'month IS NOT NULL'
|
321
317
|
end
|
322
318
|
|
323
|
-
|
324
|
-
|
325
|
-
FlightSegment.where(:aircraft_bts_code => [nil, '']).first.nil?
|
319
|
+
process "Ensure Aircraft is populated" do
|
320
|
+
Aircraft.run_data_miner!
|
326
321
|
end
|
327
322
|
|
328
|
-
|
323
|
+
process "Cache fuzzy matches between FlightSegment aircraft_description and Aircraft description" do
|
324
|
+
LooseTightDictionary::CachedResult.setup
|
325
|
+
FlightSegment.find_by_sql("SELECT DISTINCT aircraft_description FROM flight_segments WHERE aircraft_description IS NOT NULL").each do |flight_segment|
|
326
|
+
original_description = flight_segment.aircraft_description
|
327
|
+
|
328
|
+
# If the flight segment's aircraft_description contains '/' then it describes multiple aircraft.
|
329
|
+
# We need to synthesize descriptions for those aircraft, find all Aircraft that fuzzily match the
|
330
|
+
# synthesized descriptions, and associate those Aircraft with the original aircraft_description.
|
331
|
+
# e.g. boeing 747-100/200
|
332
|
+
if original_description.include?("/")
|
333
|
+
# Pull out the complete first aircraft description
|
334
|
+
# e.g. 'boeing 747-100'
|
335
|
+
first_description = original_description.split('/')[0]
|
336
|
+
|
337
|
+
# Pull out the root of the description - the text up to and including the last ' ' or '-'
|
338
|
+
# e.g. 'boeing 747-'
|
339
|
+
root_length = first_description.rindex('-')
|
340
|
+
root = first_description.slice(0..root_length)
|
341
|
+
|
342
|
+
# Pull out the suffixes - the text separated by forward slashes
|
343
|
+
# e.g. ['100', '200']
|
344
|
+
suffixes = original_description.split(root)[1].split('/')
|
345
|
+
|
346
|
+
# Create an array of synthesized descriptions by appending each suffix to the root
|
347
|
+
# e.g. ['boeing 747-100', 'boeing 747-200']
|
348
|
+
suffixes.map{ |suffix| root + suffix }.each do |synthesized_description|
|
349
|
+
# Look up the Aircraft that match each synthesized description and associate
|
350
|
+
# them with the original flight segment aircraft_description
|
351
|
+
Aircraft.loose_tight_dictionary.find_all(synthesized_description).each do |aircraft|
|
352
|
+
attrs = {
|
353
|
+
:a_class => "Aircraft",
|
354
|
+
:a => aircraft.description,
|
355
|
+
:b_class => "FlightSegment",
|
356
|
+
:b => original_description
|
357
|
+
}
|
358
|
+
unless ::LooseTightDictionary::CachedResult.exists? attrs
|
359
|
+
::LooseTightDictionary::CachedResult.create! attrs
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
# If the flight segment's aircraft_description doesn't contain '/' we can use
|
364
|
+
# a method provided by loose_tight_dictionary to associate it with Aircraft
|
365
|
+
else
|
366
|
+
flight_segment.cache_aircraft!
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
329
370
|
end
|
330
371
|
end
|
@@ -10,8 +10,11 @@ AutomobileMake.class_eval do
|
|
10
10
|
string 'fuel_efficiency_units'
|
11
11
|
end
|
12
12
|
|
13
|
-
process "
|
13
|
+
process "Ensure AutomobileMakeModelYearVariant is populated" do
|
14
14
|
AutomobileMakeModelYearVariant.run_data_miner!
|
15
|
+
end
|
16
|
+
|
17
|
+
process "Derive manufacturer names from automobile make model year variants" do
|
15
18
|
INSERT_IGNORE %{INTO automobile_makes(name)
|
16
19
|
SELECT DISTINCT automobile_make_model_year_variants.make_name
|
17
20
|
FROM automobile_make_model_year_variants
|
@@ -20,18 +23,24 @@ AutomobileMake.class_eval do
|
|
20
23
|
}
|
21
24
|
end
|
22
25
|
|
26
|
+
process "Ensure AutomobileMakeFleetYear is populated" do
|
27
|
+
AutomobileMakeFleetYear.run_data_miner!
|
28
|
+
end
|
29
|
+
|
23
30
|
# sabshere 1/31/11 add Avanti, DaimlerChrysler, IHC, Tesla, etc.
|
24
31
|
process "Derive extra manufacturer names from CAFE data" do
|
25
|
-
AutomobileMakeFleetYear.run_data_miner!
|
26
32
|
INSERT_IGNORE %{INTO automobile_makes(name)
|
27
33
|
SELECT DISTINCT automobile_make_fleet_years.make_name
|
28
34
|
FROM automobile_make_fleet_years
|
29
35
|
}
|
30
36
|
end
|
31
37
|
|
38
|
+
process "Ensure AutomobileMakeFleetYear is populated" do
|
39
|
+
AutomobileMakeFleetYear.run_data_miner!
|
40
|
+
end
|
41
|
+
|
32
42
|
# FIXME TODO make this a method on AutomobileMake?
|
33
43
|
process "Calculate fuel efficiency from automobile make fleet years for makes with CAFE data" do
|
34
|
-
AutomobileMakeFleetYear.run_data_miner!
|
35
44
|
make_fleet_years = AutomobileMakeFleetYear.arel_table
|
36
45
|
makes = AutomobileMake.arel_table
|
37
46
|
conditional_relation = makes[:name].eq(make_fleet_years[:make_name])
|