data_miner 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '~>0.2.5'
13
+ gem.add_dependency 'remote_table', '~>0.2.6'
14
14
  gem.add_dependency 'activerecord', '~>2.3.4'
15
15
  gem.add_dependency 'activesupport', '~>2.3.4'
16
16
  gem.add_dependency 'andand', '~>1.3.1'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.4
1
+ 0.4.5
data/data_miner.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.4"
8
+ s.version = "0.4.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-03-24}
12
+ s.date = %q{2010-03-26}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
51
51
  s.specification_version = 3
52
52
 
53
53
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
54
- s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.5"])
54
+ s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.6"])
55
55
  s.add_runtime_dependency(%q<activerecord>, ["~> 2.3.4"])
56
56
  s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
57
57
  s.add_runtime_dependency(%q<andand>, ["~> 1.3.1"])
@@ -60,7 +60,7 @@ Gem::Specification.new do |s|
60
60
  s.add_runtime_dependency(%q<blockenspiel>, ["~> 0.3.2"])
61
61
  s.add_runtime_dependency(%q<log4r>, ["~> 1.1.7"])
62
62
  else
63
- s.add_dependency(%q<remote_table>, ["~> 0.2.5"])
63
+ s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
64
64
  s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
65
65
  s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
66
66
  s.add_dependency(%q<andand>, ["~> 1.3.1"])
@@ -70,7 +70,7 @@ Gem::Specification.new do |s|
70
70
  s.add_dependency(%q<log4r>, ["~> 1.1.7"])
71
71
  end
72
72
  else
73
- s.add_dependency(%q<remote_table>, ["~> 0.2.5"])
73
+ s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
74
74
  s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
75
75
  s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
76
76
  s.add_dependency(%q<andand>, ["~> 1.3.1"])
@@ -83,7 +83,7 @@ module DataMiner
83
83
  end
84
84
 
85
85
  def do_convert(row, value)
86
- logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
86
+ DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
87
87
  value.to_f.convert((from_units || unit_from_source(row)), to_units)
88
88
  end
89
89
 
@@ -72,11 +72,11 @@ module DataMiner
72
72
  import_runnables.each do |runnable|
73
73
  runnable.attributes.each do |_, attribute|
74
74
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
75
- DataMiner.logger.error %{
75
+ DataMiner.log_or_raise %{
76
76
 
77
77
  ================================
78
78
 
79
- [data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
79
+ You don't have a valid unit definition for #{resource.name}##{attribute.name}.
80
80
 
81
81
  You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
82
82
 
@@ -93,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
93
93
  missing_columns = Array.new
94
94
  import_runnables.each do |runnable|
95
95
  runnable.attributes.each do |_, attribute|
96
- DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
96
+ DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
97
97
  unless resource.column_names.include? attribute.name
98
98
  missing_columns << attribute.name
99
99
  end
@@ -104,11 +104,11 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
104
104
  end
105
105
  missing_columns.uniq!
106
106
  if missing_columns.any?
107
- DataMiner.logger.error %{
107
+ DataMiner.log_or_raise %{
108
108
 
109
109
  ================================
110
110
 
111
- [data_miner gem] On #{resource}, it looks like you're missing some columns...
111
+ On #{resource}, it looks like you're missing some columns...
112
112
 
113
113
  Please run this...
114
114
 
@@ -16,7 +16,7 @@ module DataMiner
16
16
  @position_in_run = position_in_run
17
17
  @description = description
18
18
  @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
19
- @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding))
19
+ @table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
20
20
  end
21
21
 
22
22
  def inspect
@@ -28,12 +28,12 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
31
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
32
32
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
33
33
  end
34
34
 
35
35
  def key(attr_name, attr_options = {})
36
- DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
36
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
37
37
  @key = attr_name
38
38
  store attr_name, attr_options
39
39
  end
data/lib/data_miner.rb CHANGED
@@ -32,6 +32,17 @@ module DataMiner
32
32
 
33
33
  self.logger = Logger.new 'data_miner'
34
34
  logger.add info_outputter, error_outputter
35
+ ActiveRecord::Base.logger = logger
36
+ ActiveRecord::Base.colorize_logging = false
37
+ end
38
+ end
39
+
40
+ def self.log_or_raise(message)
41
+ message = "[data_miner gem] #{message}"
42
+ if ENV['RAILS_ENV'] == 'production'
43
+ logger.error message
44
+ else
45
+ raise message
35
46
  end
36
47
  end
37
48
 
@@ -51,7 +62,7 @@ end
51
62
  ActiveRecord::Base.class_eval do
52
63
  def self.data_miner(&block)
53
64
  unless table_exists?
54
- logger.error "[DataMiner gem] Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
65
+ DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
55
66
  return
56
67
  end
57
68
 
@@ -481,7 +481,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
481
481
 
482
482
  # conversions are NOT performed here, since we first have to zero out legitimate skips
483
483
  # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
484
- import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
484
+ import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
485
485
  key 'department_of_energy_identifier', :field_name => 'DOEID'
486
486
 
487
487
  store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
@@ -636,6 +636,252 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
636
636
  end
637
637
  end
638
638
 
639
+ # T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
640
+ class T100FlightSegment < ActiveRecord::Base
641
+ set_primary_key :row_hash
642
+ URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
643
+ FORM_DATA = %{
644
+ UserTableName=T_100_Segment__All_Carriers&
645
+ DBShortName=Air_Carriers&
646
+ RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
647
+ sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
648
+ varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
649
+ grouplist=&
650
+ suml=&
651
+ sumRegion=&
652
+ filter1=title%3D&
653
+ filter2=title%3D&
654
+ geo=All%A0&
655
+ time=__MONTH_NAME__&
656
+ timename=Month&
657
+ GEOGRAPHY=All&
658
+ XYEAR=__YEAR__&
659
+ FREQUENCY=__MONTH_NUMBER__&
660
+ AllVars=All&
661
+ VarName=DEPARTURES_SCHEDULED&
662
+ VarDesc=DepScheduled&
663
+ VarType=Num&
664
+ VarName=DEPARTURES_PERFORMED&
665
+ VarDesc=DepPerformed&
666
+ VarType=Num&
667
+ VarName=PAYLOAD&
668
+ VarDesc=Payload&
669
+ VarType=Num&
670
+ VarName=SEATS&
671
+ VarDesc=Seats&
672
+ VarType=Num&
673
+ VarName=PASSENGERS&
674
+ VarDesc=Passengers&
675
+ VarType=Num&
676
+ VarName=FREIGHT&
677
+ VarDesc=Freight&
678
+ VarType=Num&
679
+ VarName=MAIL&
680
+ VarDesc=Mail&
681
+ VarType=Num&
682
+ VarName=DISTANCE&
683
+ VarDesc=Distance&
684
+ VarType=Num&
685
+ VarName=RAMP_TO_RAMP&
686
+ VarDesc=RampToRamp&
687
+ VarType=Num&
688
+ VarName=AIR_TIME&
689
+ VarDesc=AirTime&
690
+ VarType=Num&
691
+ VarName=UNIQUE_CARRIER&
692
+ VarDesc=UniqueCarrier&
693
+ VarType=Char&
694
+ VarName=AIRLINE_ID&
695
+ VarDesc=AirlineID&
696
+ VarType=Num&
697
+ VarName=UNIQUE_CARRIER_NAME&
698
+ VarDesc=UniqueCarrierName&
699
+ VarType=Char&
700
+ VarName=UNIQUE_CARRIER_ENTITY&
701
+ VarDesc=UniqCarrierEntity&
702
+ VarType=Char&
703
+ VarName=REGION&
704
+ VarDesc=CarrierRegion&
705
+ VarType=Char&
706
+ VarName=CARRIER&
707
+ VarDesc=Carrier&
708
+ VarType=Char&
709
+ VarName=CARRIER_NAME&
710
+ VarDesc=CarrierName&
711
+ VarType=Char&
712
+ VarName=CARRIER_GROUP&
713
+ VarDesc=CarrierGroup&
714
+ VarType=Num&
715
+ VarName=CARRIER_GROUP_NEW&
716
+ VarDesc=CarrierGroupNew&
717
+ VarType=Num&
718
+ VarName=ORIGIN&
719
+ VarDesc=Origin&
720
+ VarType=Char&
721
+ VarName=ORIGIN_CITY_NAME&
722
+ VarDesc=OriginCityName&
723
+ VarType=Char&
724
+ VarName=ORIGIN_CITY_NUM&
725
+ VarDesc=OriginCityNum&
726
+ VarType=Num&
727
+ VarName=ORIGIN_STATE_ABR&
728
+ VarDesc=OriginState&
729
+ VarType=Char&
730
+ VarName=ORIGIN_STATE_FIPS&
731
+ VarDesc=OriginStateFips&
732
+ VarType=Char&
733
+ VarName=ORIGIN_STATE_NM&
734
+ VarDesc=OriginStateName&
735
+ VarType=Char&
736
+ VarName=ORIGIN_COUNTRY&
737
+ VarDesc=OriginCountry&
738
+ VarType=Char&
739
+ VarName=ORIGIN_COUNTRY_NAME&
740
+ VarDesc=OriginCountryName&
741
+ VarType=Char&
742
+ VarName=ORIGIN_WAC&
743
+ VarDesc=OriginWac&
744
+ VarType=Num&
745
+ VarName=DEST&
746
+ VarDesc=Dest&
747
+ VarType=Char&
748
+ VarName=DEST_CITY_NAME&
749
+ VarDesc=DestCityName&
750
+ VarType=Char&
751
+ VarName=DEST_CITY_NUM&
752
+ VarDesc=DestCityNum&
753
+ VarType=Num&
754
+ VarName=DEST_STATE_ABR&
755
+ VarDesc=DestState&
756
+ VarType=Char&
757
+ VarName=DEST_STATE_FIPS&
758
+ VarDesc=DestStateFips&
759
+ VarType=Char&
760
+ VarName=DEST_STATE_NM&
761
+ VarDesc=DestStateName&
762
+ VarType=Char&
763
+ VarName=DEST_COUNTRY&
764
+ VarDesc=DestCountry&
765
+ VarType=Char&
766
+ VarName=DEST_COUNTRY_NAME&
767
+ VarDesc=DestCountryName&
768
+ VarType=Char&
769
+ VarName=DEST_WAC&
770
+ VarDesc=DestWac&
771
+ VarType=Num&
772
+ VarName=AIRCRAFT_GROUP&
773
+ VarDesc=AircraftGroup&
774
+ VarType=Num&
775
+ VarName=AIRCRAFT_TYPE&
776
+ VarDesc=AircraftType&
777
+ VarType=Char&
778
+ VarName=AIRCRAFT_CONFIG&
779
+ VarDesc=AircraftConfig&
780
+ VarType=Num&
781
+ VarName=YEAR&
782
+ VarDesc=Year&
783
+ VarType=Num&
784
+ VarName=QUARTER&
785
+ VarDesc=Quarter&
786
+ VarType=Num&
787
+ VarName=MONTH&
788
+ VarDesc=Month&
789
+ VarType=Num&
790
+ VarName=DISTANCE_GROUP&
791
+ VarDesc=DistanceGroup&
792
+ VarType=Num&
793
+ VarName=CLASS&
794
+ VarDesc=Class&
795
+ VarType=Char&
796
+ VarName=DATA_SOURCE&
797
+ VarDesc=DataSource&
798
+ VarType=Char
799
+ }.gsub /[\s]+/,''
800
+
801
+ data_miner do
802
+ months = Hash.new
803
+ # (2008..2009).each do |year|
804
+ (2008..2008).each do |year|
805
+ # (1..12).each do |month|
806
+ (1..1).each do |month|
807
+ time = Time.gm year, month
808
+ form_data = FORM_DATA.dup
809
+ form_data.gsub! '__YEAR__', time.year.to_s
810
+ form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
811
+ form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
812
+ months[time] = form_data
813
+ end
814
+ end
815
+ months.each do |month, form_data|
816
+ import "T100 data from #{month.strftime('%B %Y')}",
817
+ :url => URL,
818
+ :form_data => form_data,
819
+ :compression => :zip,
820
+ :glob => '/*.csv' do
821
+ key 'row_hash'
822
+ store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
823
+ store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
824
+ store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
825
+ store 'seats', :field_name => 'SEATS'
826
+ store 'passengers', :field_name => 'PASSENGERS'
827
+ store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
828
+ store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
829
+ store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
830
+ store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
831
+ store 'air_time', :field_name => 'AIR_TIME'
832
+ store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
833
+ store 'dot_airline_id', :field_name => 'AIRLINE_ID'
834
+ store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
835
+ store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
836
+ store 'region', :field_name => 'REGION'
837
+ store 'carrier', :field_name => 'CARRIER'
838
+ store 'carrier_name', :field_name => 'CARRIER_NAME'
839
+ store 'carrier_group', :field_name => 'CARRIER_GROUP'
840
+ store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
841
+ store 'origin_airport_iata', :field_name => 'ORIGIN'
842
+ store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
843
+ store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
844
+ store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
845
+ store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
846
+ store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
847
+ store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
848
+ store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
849
+ store 'origin_wac', :field_name => 'ORIGIN_WAC'
850
+ store 'dest_airport_iata', :field_name => 'DEST'
851
+ store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
852
+ store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
853
+ store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
854
+ store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
855
+ store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
856
+ store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
857
+ store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
858
+ store 'dest_wac', :field_name => 'DEST_WAC'
859
+ store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
860
+ store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
861
+ store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
862
+ store 'year', :field_name => 'YEAR'
863
+ store 'quarter', :field_name => 'QUARTER'
864
+ store 'month', :field_name => 'MONTH'
865
+ store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
866
+ store 'bts_service_class', :field_name => 'CLASS'
867
+ store 'data_source', :field_name => 'DATA_SOURCE'
868
+ end
869
+ end
870
+
871
+ process 'Derive freight share as a fraction of payload' do
872
+ update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
873
+ end
874
+
875
+ process 'Derive load factor, which is passengers divided by the total seats available' do
876
+ update_all 'load_factor = passengers / seats', 'passengers <= seats'
877
+ end
878
+
879
+ process 'Derive average seats per departure' do
880
+ update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
881
+ end
882
+ end
883
+ end
884
+
639
885
  # todo: have somebody properly organize these
640
886
  class DataMinerTest < Test::Unit::TestCase
641
887
  if ENV['FAST'] == 'true'
@@ -733,9 +979,14 @@ class DataMinerTest < Test::Unit::TestCase
733
979
  assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
734
980
  end
735
981
 
736
- # should "mine residence survey day" do
737
- # ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
738
- # assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
739
- # end
982
+ should "mine T100 flight segments" do
983
+ T100FlightSegment.run_data_miner!
984
+ assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
985
+ end
986
+
987
+ should "mine residence survey day" do
988
+ ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
989
+ assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
990
+ end
740
991
  end
741
992
  end
data/test/test_helper.rb CHANGED
@@ -18,6 +18,71 @@ class Test::Unit::TestCase
18
18
  end
19
19
 
20
20
  ActiveRecord::Schema.define(:version => 20090819143429) do
21
+ create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
+ t.integer "departures_performed"
23
+ t.integer "payload"
24
+ t.integer "seats"
25
+ t.integer "passengers"
26
+ t.integer "freight"
27
+ t.integer "mail"
28
+ t.integer "ramp_to_ramp"
29
+ t.integer "air_time"
30
+ t.float "load_factor"
31
+ t.float "freight_share"
32
+ t.integer "distance"
33
+ t.integer "departures_scheduled"
34
+ t.string "unique_carrier"
35
+ t.integer "dot_airline_id"
36
+ t.string "unique_carrier_name"
37
+ t.string "unique_carrier_entity"
38
+ t.string "region"
39
+ t.string "carrier"
40
+ t.string "carrier_name"
41
+ t.integer "carrier_group"
42
+ t.integer "carrier_group_new"
43
+ t.string "origin_airport_iata"
44
+ t.string "origin_city_name"
45
+ t.integer "origin_city_num"
46
+ t.string "origin_state_abr"
47
+ t.string "origin_state_fips"
48
+ t.string "origin_state_nm"
49
+ t.string "origin_country_iso_3166"
50
+ t.string "origin_country_name"
51
+ t.integer "origin_wac"
52
+ t.string "dest_airport_iata"
53
+ t.string "dest_city_name"
54
+ t.integer "dest_city_num"
55
+ t.string "dest_state_abr"
56
+ t.string "dest_state_fips"
57
+ t.string "dest_state_nm"
58
+ t.string "dest_country_iso_3166"
59
+ t.string "dest_country_name"
60
+ t.integer "dest_wac"
61
+ t.integer "bts_aircraft_group"
62
+ t.integer "bts_aircraft_type"
63
+ t.integer "bts_aircraft_config"
64
+ t.integer "year"
65
+ t.integer "quarter"
66
+ t.integer "month"
67
+ t.integer "bts_distance_group"
68
+ t.string "bts_service_class"
69
+ t.string "data_source"
70
+ t.float "seats_per_departure"
71
+
72
+ t.string 'payload_units'
73
+ t.string 'freight_units'
74
+ t.string 'mail_units'
75
+ t.string 'distance_units'
76
+
77
+ t.datetime "created_at"
78
+ t.datetime "updated_at"
79
+
80
+ t.string "row_hash"
81
+ t.integer 'data_miner_touch_count'
82
+ t.integer 'data_miner_last_run_id'
83
+ end
84
+ execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
85
+
21
86
  create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
87
  t.string 'iata_code'
23
88
  t.string 'name'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2010-03-24 00:00:00 -04:00
13
+ date: 2010-03-26 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -21,7 +21,7 @@ dependencies:
21
21
  requirements:
22
22
  - - ~>
23
23
  - !ruby/object:Gem::Version
24
- version: 0.2.5
24
+ version: 0.2.6
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: activerecord