earth-ruby19 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. data/Gemfile +5 -0
  2. data/Gemfile.lock +138 -0
  3. data/LICENSE +20 -0
  4. data/README.markdown +38 -0
  5. data/lib/earth.rb +146 -0
  6. data/lib/earth/active_record_ext.rb +9 -0
  7. data/lib/earth/air.rb +13 -0
  8. data/lib/earth/air/aircraft.rb +21 -0
  9. data/lib/earth/air/aircraft/data_miner.rb +184 -0
  10. data/lib/earth/air/aircraft_class.rb +10 -0
  11. data/lib/earth/air/aircraft_class/data_miner.rb +42 -0
  12. data/lib/earth/air/aircraft_manufacturer.rb +9 -0
  13. data/lib/earth/air/aircraft_manufacturer/data_miner.rb +20 -0
  14. data/lib/earth/air/airline.rb +16 -0
  15. data/lib/earth/air/airline/data_miner.rb +57 -0
  16. data/lib/earth/air/airport.rb +44 -0
  17. data/lib/earth/air/airport/data_miner.rb +80 -0
  18. data/lib/earth/air/data_miner.rb +15 -0
  19. data/lib/earth/air/flight_configuration.rb +7 -0
  20. data/lib/earth/air/flight_configuration/data_miner.rb +16 -0
  21. data/lib/earth/air/flight_distance_class.rb +7 -0
  22. data/lib/earth/air/flight_distance_class/data_miner.rb +16 -0
  23. data/lib/earth/air/flight_domesticity.rb +6 -0
  24. data/lib/earth/air/flight_domesticity/data_miner.rb +57 -0
  25. data/lib/earth/air/flight_fuel_type.rb +12 -0
  26. data/lib/earth/air/flight_fuel_type/data_miner.rb +12 -0
  27. data/lib/earth/air/flight_propulsion.rb +7 -0
  28. data/lib/earth/air/flight_propulsion/data_miner.rb +16 -0
  29. data/lib/earth/air/flight_seat_class.rb +12 -0
  30. data/lib/earth/air/flight_seat_class/data_miner.rb +36 -0
  31. data/lib/earth/air/flight_segment.rb +29 -0
  32. data/lib/earth/air/flight_segment/data_miner.rb +330 -0
  33. data/lib/earth/air/flight_service.rb +7 -0
  34. data/lib/earth/air/flight_service/data_miner.rb +16 -0
  35. data/lib/earth/all.rb +11 -0
  36. data/lib/earth/automobile.rb +8 -0
  37. data/lib/earth/automobile/automobile_fuel_type.rb +18 -0
  38. data/lib/earth/automobile/automobile_fuel_type/data_miner.rb +45 -0
  39. data/lib/earth/automobile/automobile_make.rb +14 -0
  40. data/lib/earth/automobile/automobile_make/data_miner.rb +68 -0
  41. data/lib/earth/automobile/automobile_make_fleet_year.rb +15 -0
  42. data/lib/earth/automobile/automobile_make_fleet_year/data_miner.rb +29 -0
  43. data/lib/earth/automobile/automobile_make_year.rb +14 -0
  44. data/lib/earth/automobile/automobile_make_year/data_miner.rb +45 -0
  45. data/lib/earth/automobile/automobile_model.rb +14 -0
  46. data/lib/earth/automobile/automobile_model/data_miner.rb +38 -0
  47. data/lib/earth/automobile/automobile_model_year.rb +15 -0
  48. data/lib/earth/automobile/automobile_model_year/data_miner.rb +51 -0
  49. data/lib/earth/automobile/automobile_size_class.rb +14 -0
  50. data/lib/earth/automobile/automobile_size_class/data_miner.rb +43 -0
  51. data/lib/earth/automobile/automobile_variant.rb +17 -0
  52. data/lib/earth/automobile/automobile_variant/data_miner.rb +460 -0
  53. data/lib/earth/automobile/data_miner.rb +8 -0
  54. data/lib/earth/base.rb +7 -0
  55. data/lib/earth/bus.rb +1 -0
  56. data/lib/earth/bus/bus_class.rb +19 -0
  57. data/lib/earth/bus/bus_class/data_miner.rb +41 -0
  58. data/lib/earth/bus/data_miner.rb +1 -0
  59. data/lib/earth/conversions_ext.rb +45 -0
  60. data/lib/earth/data_miner.rb +10 -0
  61. data/lib/earth/diet.rb +2 -0
  62. data/lib/earth/diet/data_miner.rb +2 -0
  63. data/lib/earth/diet/diet_class.rb +15 -0
  64. data/lib/earth/diet/diet_class/data_miner.rb +36 -0
  65. data/lib/earth/diet/food_group.rb +17 -0
  66. data/lib/earth/diet/food_group/data_miner.rb +26 -0
  67. data/lib/earth/fuel.rb +2 -0
  68. data/lib/earth/fuel/data_miner.rb +2 -0
  69. data/lib/earth/fuel/fuel_price.rb +13 -0
  70. data/lib/earth/fuel/fuel_price/data_miner.rb +20 -0
  71. data/lib/earth/fuel/fuel_type.rb +18 -0
  72. data/lib/earth/fuel/fuel_type/data_miner.rb +37 -0
  73. data/lib/earth/hospitality.rb +1 -0
  74. data/lib/earth/hospitality/data_miner.rb +1 -0
  75. data/lib/earth/hospitality/lodging_class.rb +9 -0
  76. data/lib/earth/hospitality/lodging_class/data_miner.rb +30 -0
  77. data/lib/earth/industry.rb +10 -0
  78. data/lib/earth/industry/industry.rb +23 -0
  79. data/lib/earth/industry/industry_product.rb +22 -0
  80. data/lib/earth/industry/industry_product_line.rb +20 -0
  81. data/lib/earth/industry/industry_sector.rb +20 -0
  82. data/lib/earth/industry/merchant.rb +18 -0
  83. data/lib/earth/industry/merchant_category.rb +22 -0
  84. data/lib/earth/industry/merchant_category_industry.rb +20 -0
  85. data/lib/earth/industry/product_line.rb +22 -0
  86. data/lib/earth/industry/product_line_industry_product.rb +20 -0
  87. data/lib/earth/industry/sector.rb +19 -0
  88. data/lib/earth/inflectors.rb +9 -0
  89. data/lib/earth/locality.rb +10 -0
  90. data/lib/earth/locality/census_division.rb +22 -0
  91. data/lib/earth/locality/census_division/data_miner.rb +64 -0
  92. data/lib/earth/locality/census_region.rb +13 -0
  93. data/lib/earth/locality/census_region/data_miner.rb +17 -0
  94. data/lib/earth/locality/climate_division.rb +17 -0
  95. data/lib/earth/locality/climate_division/data_miner.rb +20 -0
  96. data/lib/earth/locality/country.rb +13 -0
  97. data/lib/earth/locality/country/data_miner.rb +19 -0
  98. data/lib/earth/locality/data_miner.rb +10 -0
  99. data/lib/earth/locality/egrid_region.rb +15 -0
  100. data/lib/earth/locality/egrid_region/data_miner.rb +35 -0
  101. data/lib/earth/locality/egrid_subregion.rb +16 -0
  102. data/lib/earth/locality/egrid_subregion/data_miner.rb +65 -0
  103. data/lib/earth/locality/petroleum_administration_for_defense_district.rb +13 -0
  104. data/lib/earth/locality/petroleum_administration_for_defense_district/data_miner.rb +21 -0
  105. data/lib/earth/locality/state.rb +22 -0
  106. data/lib/earth/locality/state/data_miner.rb +37 -0
  107. data/lib/earth/locality/urbanity.rb +10 -0
  108. data/lib/earth/locality/urbanity/data_miner.rb +15 -0
  109. data/lib/earth/locality/zip_code.rb +23 -0
  110. data/lib/earth/locality/zip_code/data_miner.rb +43 -0
  111. data/lib/earth/pet.rb +4 -0
  112. data/lib/earth/pet/breed.rb +15 -0
  113. data/lib/earth/pet/breed/data_miner.rb +25 -0
  114. data/lib/earth/pet/breed_gender.rb +14 -0
  115. data/lib/earth/pet/breed_gender/data_miner.rb +21 -0
  116. data/lib/earth/pet/data_miner.rb +4 -0
  117. data/lib/earth/pet/gender.rb +10 -0
  118. data/lib/earth/pet/gender/data_miner.rb +13 -0
  119. data/lib/earth/pet/species.rb +40 -0
  120. data/lib/earth/pet/species/data_miner.rb +42 -0
  121. data/lib/earth/rail.rb +1 -0
  122. data/lib/earth/rail/data_miner.rb +1 -0
  123. data/lib/earth/rail/rail_class.rb +16 -0
  124. data/lib/earth/rail/rail_class/data_miner.rb +36 -0
  125. data/lib/earth/residence.rb +8 -0
  126. data/lib/earth/residence/air_conditioner_use.rb +13 -0
  127. data/lib/earth/residence/air_conditioner_use/data_miner.rb +22 -0
  128. data/lib/earth/residence/clothes_machine_use.rb +10 -0
  129. data/lib/earth/residence/clothes_machine_use/data_miner.rb +28 -0
  130. data/lib/earth/residence/data_miner.rb +8 -0
  131. data/lib/earth/residence/dishwasher_use.rb +10 -0
  132. data/lib/earth/residence/dishwasher_use/data_miner.rb +28 -0
  133. data/lib/earth/residence/residence_appliance.rb +16 -0
  134. data/lib/earth/residence/residence_appliance/data_miner.rb +20 -0
  135. data/lib/earth/residence/residence_class.rb +16 -0
  136. data/lib/earth/residence/residence_class/data_miner.rb +15 -0
  137. data/lib/earth/residence/residence_fuel_price.rb +18 -0
  138. data/lib/earth/residence/residence_fuel_price/data_miner.rb +200 -0
  139. data/lib/earth/residence/residence_fuel_type.rb +32 -0
  140. data/lib/earth/residence/residence_fuel_type/data_miner.rb +18 -0
  141. data/lib/earth/residence/residential_energy_consumption_survey_response.rb +39 -0
  142. data/lib/earth/residence/residential_energy_consumption_survey_response/data_miner.rb +283 -0
  143. data/spec/lib/earth_spec.rb +25 -0
  144. data/spec/spec_helper.rb +11 -0
  145. data/vendor/geokit-rails/CHANGELOG.rdoc +46 -0
  146. data/vendor/geokit-rails/MIT-LICENSE +20 -0
  147. data/vendor/geokit-rails/README.markdown +561 -0
  148. data/vendor/geokit-rails/Rakefile +18 -0
  149. data/vendor/geokit-rails/about.yml +9 -0
  150. data/vendor/geokit-rails/assets/api_keys_template +61 -0
  151. data/vendor/geokit-rails/init.rb +1 -0
  152. data/vendor/geokit-rails/install.rb +14 -0
  153. data/vendor/geokit-rails/lib/geokit-rails.rb +24 -0
  154. data/vendor/geokit-rails/lib/geokit-rails/acts_as_mappable.rb +456 -0
  155. data/vendor/geokit-rails/lib/geokit-rails/adapters/abstract.rb +31 -0
  156. data/vendor/geokit-rails/lib/geokit-rails/adapters/mysql.rb +22 -0
  157. data/vendor/geokit-rails/lib/geokit-rails/adapters/postgresql.rb +22 -0
  158. data/vendor/geokit-rails/lib/geokit-rails/adapters/sqlserver.rb +43 -0
  159. data/vendor/geokit-rails/lib/geokit-rails/defaults.rb +22 -0
  160. data/vendor/geokit-rails/lib/geokit-rails/geocoder_control.rb +16 -0
  161. data/vendor/geokit-rails/lib/geokit-rails/ip_geocode_lookup.rb +46 -0
  162. data/vendor/geokit-rails/test/acts_as_mappable_test.rb +474 -0
  163. data/vendor/geokit-rails/test/boot.rb +25 -0
  164. data/vendor/geokit-rails/test/database.yml +20 -0
  165. data/vendor/geokit-rails/test/fixtures/companies.yml +7 -0
  166. data/vendor/geokit-rails/test/fixtures/custom_locations.yml +54 -0
  167. data/vendor/geokit-rails/test/fixtures/locations.yml +54 -0
  168. data/vendor/geokit-rails/test/fixtures/mock_addresses.yml +17 -0
  169. data/vendor/geokit-rails/test/fixtures/mock_families.yml +2 -0
  170. data/vendor/geokit-rails/test/fixtures/mock_houses.yml +9 -0
  171. data/vendor/geokit-rails/test/fixtures/mock_organizations.yml +5 -0
  172. data/vendor/geokit-rails/test/fixtures/mock_people.yml +5 -0
  173. data/vendor/geokit-rails/test/fixtures/stores.yml +0 -0
  174. data/vendor/geokit-rails/test/ip_geocode_lookup_test.rb +77 -0
  175. data/vendor/geokit-rails/test/models/company.rb +3 -0
  176. data/vendor/geokit-rails/test/models/custom_location.rb +12 -0
  177. data/vendor/geokit-rails/test/models/location.rb +4 -0
  178. data/vendor/geokit-rails/test/models/mock_address.rb +4 -0
  179. data/vendor/geokit-rails/test/models/mock_family.rb +3 -0
  180. data/vendor/geokit-rails/test/models/mock_house.rb +3 -0
  181. data/vendor/geokit-rails/test/models/mock_organization.rb +4 -0
  182. data/vendor/geokit-rails/test/models/mock_person.rb +4 -0
  183. data/vendor/geokit-rails/test/models/store.rb +3 -0
  184. data/vendor/geokit-rails/test/schema.rb +60 -0
  185. data/vendor/geokit-rails/test/tasks.rake +31 -0
  186. data/vendor/geokit-rails/test/test_helper.rb +23 -0
  187. metadata +476 -0
@@ -0,0 +1,10 @@
1
+ class AircraftClass < ActiveRecord::Base
2
+ set_primary_key :brighter_planet_aircraft_class_code
3
+
4
+ has_many :aircraft, :foreign_key => 'brighter_planet_aircraft_class_code'
5
+ # has_many :airline_aircraft_seat_classes, :through => :aircraft
6
+
7
+ data_miner do
8
+ tap "Brighter Planet's aircraft class data", Earth.taps_server
9
+ end
10
+ end
@@ -0,0 +1,42 @@
1
+ AircraftClass.class_eval do
2
+ data_miner do
3
+ schema Earth.database_options do
4
+ string 'brighter_planet_aircraft_class_code'
5
+ string 'name'
6
+ float 'm1'
7
+ float 'm2'
8
+ float 'm3'
9
+ float 'endpoint_fuel'
10
+ integer 'seats'
11
+ end
12
+
13
+ import "Brighter Planet's aircraft classes", :url => 'http://static.brighterplanet.com/science/data/transport/air/brighter_planet_aircraft_classes.csv' do
14
+ key 'brighter_planet_aircraft_class_code'
15
+ store 'name', :field_name => 'description'
16
+ end
17
+
18
+ process "Derive some average aircraft chraracteristics from aircraft" do
19
+ Aircraft.run_data_miner!
20
+ aircraft = Aircraft.arel_table
21
+ aircraft_classes = AircraftClass.arel_table
22
+ conditional_relation = aircraft_classes[:brighter_planet_aircraft_class_code].eq(aircraft[:brighter_planet_aircraft_class_code])
23
+ %w{ m1 m2 m3 endpoint_fuel }.each do |column|
24
+ relation = Aircraft.weighted_average_relation(column).
25
+ where(conditional_relation)
26
+ update_all "#{column} = (#{relation.to_sql})"
27
+ end
28
+ end
29
+
30
+ process "Derive some average aircraft characteristics from flight segments" do # FIXME TODO why not derive this from aircraft?
31
+ FlightSegment.run_data_miner!
32
+ aircraft = Aircraft.arel_table
33
+ aircraft_classes = AircraftClass.arel_table
34
+ segments = FlightSegment.arel_table
35
+ relation = FlightSegment.joins(:aircraft). # this requires associations
36
+ weighted_average_relation(:seats, :weighted_by => :passengers).
37
+ where(aircraft_classes[:brighter_planet_aircraft_class_code].eq(aircraft[:brighter_planet_aircraft_class_code]))
38
+ update_all "seats = (#{relation.to_sql})"
39
+ end
40
+ end
41
+ end
42
+
@@ -0,0 +1,9 @@
1
+ class AircraftManufacturer < ActiveRecord::Base
2
+ set_primary_key :name
3
+
4
+ has_many :aircraft, :foreign_key => 'manufacturer_name'
5
+
6
+ data_miner do
7
+ tap "Brighter Planet's aircraft manufacturer data", Earth.taps_server
8
+ end
9
+ end
@@ -0,0 +1,20 @@
1
+ AircraftManufacturer.class_eval do
2
+ data_miner do
3
+ process "Start from scratch" do
4
+ connection.drop_table table_name
5
+ end
6
+
7
+ schema Earth.database_options do
8
+ string 'name'
9
+ end
10
+
11
+ process "Derive a list of aircraft manufacturers from aircraft" do
12
+ Aircraft.run_data_miner!
13
+ connection.execute %{
14
+ INSERT IGNORE INTO aircraft_manufacturers(name)
15
+ SELECT aircraft.manufacturer_name FROM aircraft WHERE LENGTH(aircraft.manufacturer_name) > 0
16
+ }
17
+ end
18
+ end
19
+ end
20
+
@@ -0,0 +1,16 @@
1
+ class Airline < ActiveRecord::Base
2
+ set_primary_key :iata_code
3
+
4
+ # has_many :airline_aircraft, :class_name => 'AirlineAircraft'
5
+ # has_many :seat_classes, :class_name => 'AirlineSeatClass'
6
+ has_many :segments, :class_name => "FlightSegment", :foreign_key => 'airline_iata_code'
7
+ # has_many :airline_aircraft_seat_classes, :class_name => 'AirlineAircraftSeatClass'
8
+
9
+ data_miner do
10
+ tap "Brighter Planet's sanitized airlines data", Earth.taps_server
11
+ end
12
+
13
+ def all_flights_domestic?
14
+ !international?
15
+ end
16
+ end
@@ -0,0 +1,57 @@
1
+ Airline.class_eval do
2
+ class Airline::Guru
3
+ # needed by errata
4
+ def is_not_coral_air?(row); row['Code'].to_i != 19155; end # 19155 Coral Air Inc.: COR
5
+ def is_not_aviacion_y_comercio?(row); row['Code'].to_i != 19452; end # 19452 Aviacion Y Comercio S.A.: AO
6
+ def is_not_air_china?(row); row['Code'].to_i != 19543; end # 19543 Air China: CA
7
+ def is_not_south_african_airways?(row); row['Code'].to_i != 19570; end # 19570 South African Airways: SA
8
+ def is_not_continental_airlines?(row); row['Code'].to_i != 19704; end # 19704 Continental Air Lines Inc.: CO
9
+ def is_not_sallee_s_aviation?(row); row['Code'].to_i != 19740; end # 19740 Sallee's Aviation: SAL
10
+ def is_not_air_berlin?(row); row['Code'].to_i != 21361; end # "21361","Air Berlin PLC and CO: AB"
11
+ end
12
+
13
+ data_miner do
14
+ schema Earth.database_options do
15
+ string 'iata_code'
16
+ string 'name'
17
+ string 'dot_airline_id_code'
18
+ boolean 'international'
19
+ float 'seats'
20
+ float 'distance'
21
+ string 'distance_units'
22
+ float 'load_factor'
23
+ float 'freight_share'
24
+ float 'payload'
25
+ string 'payload_units'
26
+ end
27
+
28
+ import "the T100 AIRLINE_ID lookup table, which also includes IATA codes",
29
+ :url => 'http://www.transtats.bts.gov/Download_Lookup.asp?Lookup=L_AIRLINE_ID',
30
+ :errata => Errata.new(:url => 'http://static.brighterplanet.com/science/data/transport/air/airlines/errata.csv',
31
+ :responder => Airline::Guru.new) do
32
+ key 'iata_code', :field_name => 'Description', :split => { :pattern => /:/, :keep => 1 }
33
+ store 'dot_airline_id_code', :field_name => 'Code'
34
+ store 'name', :field_name => 'Description', :split => { :pattern => /:/, :keep => 0 }
35
+ end
36
+
37
+ process "Determine whether airlines fly internationally by looking at flight segments" do
38
+ FlightSegment.run_data_miner!
39
+ update_all 'international = 1', '(SELECT COUNT(*) FROM flight_segments WHERE flight_segments.airline_iata_code = airlines.iata_code AND flight_segments.origin_country_iso_3166_code != flight_segments.dest_country_iso_3166_code AND flight_segments.origin_country_iso_3166_code IS NOT NULL AND flight_segments.dest_country_iso_3166_code IS NOT NULL) > 0'
40
+ end
41
+
42
+ process "Derive some average flight characteristics from flight segments" do
43
+ FlightSegment.run_data_miner!
44
+ airlines = Airline.arel_table
45
+ segments = FlightSegment.arel_table
46
+
47
+ conditional_relation = airlines[:iata_code].eq(segments[:airline_iata_code])
48
+
49
+ update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
50
+ update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
51
+ update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
52
+ update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
53
+ update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).where(conditional_relation).to_sql})"
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,44 @@
1
+ class Airport < ActiveRecord::Base
2
+ set_primary_key :iata_code
3
+
4
+ # --------------------------------
5
+ # virtual has_many association
6
+ # has_many :segments won't work because there's no general way to specify the correct conditions
7
+ # even if you get clever with it, like
8
+ # has_many :segments,
9
+ # :class_name => 'FlightSegment',
10
+ # :foreign_key => 'origin_airport_id',
11
+ # :conditions => 'flight_segments.destination_airport_id = #{id}'
12
+ # you get queries like "`flight_segments`.origin_airport_id = 3654 AND (flight_segments.destination_airport_id = 3654))"
13
+ # in which you notice the AND which must be an OR
14
+ # and you can't just do finder_sql, because that breaks any other :select
15
+ def segments
16
+ FlightSegment.scoped :conditions => ['origin_airport_id = ? OR destination_airport_id = ?', id, id]
17
+ end
18
+ # --------------------------------
19
+
20
+ belongs_to :country, :foreign_key => 'country_iso_3166_code'
21
+ acts_as_mappable :default_units => :nms,
22
+ :lat_column_name => :latitude,
23
+ :lng_column_name => :longitude
24
+
25
+ data_miner do
26
+ tap "Brighter Planet's sanitized airports data", Earth.taps_server
27
+
28
+ process "pull dependencies" do
29
+ run_data_miner_on_belongs_to_associations
30
+ end
31
+ end
32
+
33
+ def all_flights_from_here_domestic?
34
+ !international_origin?
35
+ end
36
+
37
+ def all_flights_to_here_domestic?
38
+ !international_destination?
39
+ end
40
+
41
+ def united_states?
42
+ country == Country.united_states
43
+ end
44
+ end
@@ -0,0 +1,80 @@
1
+ Airport.class_eval do
2
+ data_miner do
3
+ schema Earth.database_options do
4
+ string 'iata_code'
5
+ string 'name'
6
+ string 'city'
7
+ string 'country_name'
8
+ string 'country_iso_3166_code'
9
+ float 'latitude'
10
+ float 'longitude'
11
+ float 'seats'
12
+ float 'distance'
13
+ string 'distance_units'
14
+ float 'load_factor'
15
+ float 'freight_share'
16
+ float 'payload'
17
+ string 'payload_units'
18
+ boolean 'international_origin'
19
+ boolean 'international_destination'
20
+ end
21
+
22
+ import "the OpenFlights.org airports database",
23
+ :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat',
24
+ :headers => false,
25
+ :select => lambda { |row| row[4].present? } do
26
+ key 'iata_code', :field_number => 4
27
+ store 'name', :field_number => 1
28
+ store 'city', :field_number => 2
29
+ store 'country_name', :field_number => 3
30
+ store 'country_iso_3166_code', :field_number => 3, :upcase => true, :dictionary => { :input => 'name', :output => 'iso_3166_code', :url => 'http://data.brighterplanet.com/countries.csv' }
31
+ store 'latitude', :field_number => 6
32
+ store 'longitude', :field_number => 7
33
+ end
34
+
35
+ # step.await :other_class => FlightSegment do |deferred|
36
+ # deferred.derive :country # this uses a heuristic that depends on flight segments
37
+ # class << self
38
+ # def derive_country
39
+ # update_all('country_id = (SELECT flight_segments.origin_country_id FROM flight_segments WHERE flight_segments.origin_airport_id = airports.id AND flight_segments.origin_country_id IS NOT NULL LIMIT 1)', 'airports.country_id IS NULL')
40
+ # update_all('country_id = (SELECT flight_segments.destination_country_id FROM flight_segments WHERE flight_segments.destination_airport_id = airports.id AND flight_segments.destination_country_id IS NOT NULL LIMIT 1)', 'airports.country_id IS NULL')
41
+ # Country.all.each do |c|
42
+ # next if c.name.blank?
43
+ # update_all("country_id = #{c.id}", ["airports.country_id IS NULL AND airports.country_name LIKE ?", "%#{c.name.upcase}%"])
44
+ # end
45
+ # Airport.all(:conditions => 'country_id IS NULL AND country_name IS NOT NULL').each do |a|
46
+ # c = Country.find(:first, :conditions => ["name like ?", "%#{a.country_name}%"])
47
+ # a.update_attributes(:country_id => c.id) if c
48
+ # end
49
+ # end
50
+ # end
51
+
52
+ process "Determine whether each airport serves international flights" do
53
+ FlightSegment.run_data_miner!
54
+ update_all 'international_origin = 1', '(SELECT COUNT(*) FROM flight_segments WHERE flight_segments.origin_airport_iata_code = airports.iata_code AND flight_segments.origin_country_iso_3166_code != flight_segments.dest_country_iso_3166_code AND flight_segments.origin_country_iso_3166_code IS NOT NULL AND flight_segments.dest_country_iso_3166_code IS NOT NULL LIMIT 1) > 0'
55
+ update_all 'international_destination = 1', '(SELECT COUNT(*) FROM flight_segments WHERE flight_segments.dest_airport_iata_code = airports.iata_code AND flight_segments.origin_country_iso_3166_code != flight_segments.dest_country_iso_3166_code AND flight_segments.origin_country_iso_3166_code IS NOT NULL AND flight_segments.dest_country_iso_3166_code IS NOT NULL LIMIT 1) > 0'
56
+ end
57
+
58
+ # sabshere 5/24/10 using temporary tables because the WHERE clause has a very slow OR condition: iata_code = dest_iata_code OR iata_code = origin_iata_code
59
+ process "Derive some average flight characteristics from flight segments" do
60
+ FlightSegment.run_data_miner!
61
+ segments = FlightSegment.arel_table
62
+ airports = Airport.arel_table
63
+
64
+ find_in_batches do |batch|
65
+ batch.each do |airport|
66
+ targeting_relation = airports[:iata_code].eq airport.iata_code
67
+ conditional_relation = segments[:origin_airport_iata_code].eq(airport.iata_code).or(segments[:dest_airport_iata_code].eq(airport.iata_code))
68
+ connection.execute "CREATE TEMPORARY TABLE tmp1 #{FlightSegment.where(conditional_relation).to_sql}"
69
+ update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
70
+ update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
71
+ update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
72
+ update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
73
+ update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
74
+ connection.execute 'DROP TABLE tmp1'
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+
@@ -0,0 +1,15 @@
1
+ require 'earth/locality'
2
+ require 'earth/locality/data_miner'
3
+ require 'earth/air/aircraft/data_miner'
4
+ require 'earth/air/aircraft_class/data_miner'
5
+ require 'earth/air/aircraft_manufacturer/data_miner'
6
+ require 'earth/air/airline/data_miner'
7
+ require 'earth/air/airport/data_miner'
8
+ require 'earth/air/flight_configuration/data_miner'
9
+ require 'earth/air/flight_distance_class/data_miner'
10
+ require 'earth/air/flight_domesticity/data_miner'
11
+ require 'earth/air/flight_fuel_type/data_miner'
12
+ require 'earth/air/flight_propulsion/data_miner'
13
+ require 'earth/air/flight_seat_class/data_miner'
14
+ require 'earth/air/flight_segment/data_miner'
15
+ require 'earth/air/flight_service/data_miner'
@@ -0,0 +1,7 @@
1
+ class FlightConfiguration < ActiveRecord::Base
2
+ set_primary_key :name
3
+
4
+ data_miner do
5
+ tap "Brighter Planet's flight configuration data", Earth.taps_server
6
+ end
7
+ end
@@ -0,0 +1,16 @@
1
+ FlightConfiguration.class_eval do
2
+ data_miner do
3
+ schema do
4
+ string 'name'
5
+ string 'bts_aircraft_configuration_code'
6
+ end
7
+
8
+ process "derive from flight segments" do
9
+ FlightSegment.run_data_miner!
10
+ connection.execute %{
11
+ INSERT IGNORE INTO flight_configurations(name, bts_aircraft_configuration_code)
12
+ SELECT flight_segments.configuration_id, flight_segments.bts_aircraft_configuration_code FROM flight_segments WHERE LENGTH(flight_segments.configuration_id) > 0
13
+ }
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ class FlightDistanceClass < ActiveRecord::Base
2
+ set_primary_key :name
3
+
4
+ data_miner do
5
+ tap "Brighter Planet's sanitized distance class data", Earth.taps_server
6
+ end
7
+ end
@@ -0,0 +1,16 @@
1
+ FlightDistanceClass.class_eval do
2
+ data_miner do
3
+ schema Earth.database_options do
4
+ string 'name'
5
+ float 'distance'
6
+ string 'distance_units'
7
+ end
8
+
9
+ import "a list of Brighter Planet-defined distance classes",
10
+ :url => 'http://static.brighterplanet.com/science/data/transport/air/distance_classes/distance_classes.csv' do
11
+ key 'name'
12
+ store 'distance', :units_field_name => 'units', :to_units => :kilometres
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,6 @@
1
+ class FlightDomesticity < ActiveRecord::Base
2
+ set_primary_key :name
3
+ data_miner do
4
+ tap "Brighter Planet's flight domesticity info", Earth.taps_server
5
+ end
6
+ end
@@ -0,0 +1,57 @@
1
+ FlightDomesticity.class_eval do
2
+ data_miner do
3
+ process "Start from scratch" do
4
+ connection.drop_table table_name
5
+ end
6
+
7
+ schema Earth.database_options do
8
+ string 'name'
9
+ string 'bts_data_source_code'
10
+ float 'distance'
11
+ string 'distance_units'
12
+ float 'freight_share'
13
+ float 'load_factor'
14
+ float 'seats'
15
+ float 'payload'
16
+ string 'payload_units'
17
+ end
18
+
19
+ process "Derive flight domesticities from flight segments" do # FIXME TODO might make more sense to combine foreign and domestic carriers -> domestic flight (all carriers), international flight (all carriers)
20
+ FlightSegment.run_data_miner!
21
+ connection.execute %{
22
+ INSERT IGNORE INTO flight_domesticities(name, bts_data_source_code)
23
+ SELECT flight_segments.domesticity_id, flight_segments.bts_data_source_code FROM flight_segments WHERE LENGTH(flight_segments.domesticity_id) > 0
24
+ }
25
+ end
26
+
27
+ process "Derive average flight characteristics from flight segments" do
28
+ FlightSegment.run_data_miner!
29
+ segments = FlightSegment.arel_table
30
+ flight_domesticities = FlightDomesticity.arel_table
31
+ ## slow, all-in-one method
32
+ # conditional_relation = flight_domesticities[:name].eq(segments[:domesticity_id])
33
+ # update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
34
+ # update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
35
+ # update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
36
+ # update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).where(conditional_relation).to_sql})"
37
+ # update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).where(conditional_relation).to_sql})"
38
+ ## fast method using temp tables
39
+ find_in_batches do |batch|
40
+ batch.each do |flight_domesticity|
41
+ targeting_relation = flight_domesticities[:name].eq flight_domesticity.name
42
+ conditional_relation = segments[:domesticity_id].eq flight_domesticity.name
43
+ connection.execute "CREATE TEMPORARY TABLE tmp1 #{FlightSegment.where(conditional_relation).to_sql}"
44
+ update_all "seats = (#{FlightSegment.weighted_average_relation(:seats, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
45
+ update_all "distance = (#{FlightSegment.weighted_average_relation(:distance, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
46
+ update_all "load_factor = (#{FlightSegment.weighted_average_relation(:load_factor, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
47
+ update_all "freight_share = (#{FlightSegment.weighted_average_relation(:freight_share, :weighted_by => :passengers ).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
48
+ update_all "payload = (#{FlightSegment.weighted_average_relation(:payload, :weighted_by => :passengers, :disaggregate_by => :departures_performed).to_sql.gsub('flight_segments', 'tmp1')})", targeting_relation.to_sql
49
+ connection.execute 'DROP TABLE tmp1'
50
+ end
51
+ end
52
+ update_all "distance_units = 'kilometres'"
53
+ update_all "payload_units = 'kilograms'"
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,12 @@
1
+ class FlightFuelType < ActiveRecord::Base
2
+ # this fallback is jet fuel
3
+ falls_back_on :emission_factor => (21.09.pounds.to(:kilograms) / 1.gallons.to(:litres)), # in pounds CO2/gallon fuel: http://www.eia.doe.gov/oiaf/1605/excel/Fuel%20Emission%20Factors.xls
4
+ :radiative_forcing_index => 2, # from Matt
5
+ :density => 3.057 # kg / gal
6
+
7
+ data_miner do
8
+ tap "Brighter Planet's sanitized flight fuel type data", Earth.taps_server
9
+
10
+ # we just always use the fallback
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ FlightFuelType.class_eval do
2
+ data_miner do
3
+ schema do
4
+ string 'name'
5
+ float 'emission_factor'
6
+ float 'radiative_forcing_index'
7
+ float 'density'
8
+ end
9
+
10
+ # we just always use the fallback
11
+ end
12
+ end