data_miner 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '~>0.2.5'
13
+ gem.add_dependency 'remote_table', '~>0.2.6'
14
14
  gem.add_dependency 'activerecord', '~>2.3.4'
15
15
  gem.add_dependency 'activesupport', '~>2.3.4'
16
16
  gem.add_dependency 'andand', '~>1.3.1'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.4
1
+ 0.4.5
data/data_miner.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.4"
8
+ s.version = "0.4.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-03-24}
12
+ s.date = %q{2010-03-26}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
51
51
  s.specification_version = 3
52
52
 
53
53
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
54
- s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.5"])
54
+ s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.6"])
55
55
  s.add_runtime_dependency(%q<activerecord>, ["~> 2.3.4"])
56
56
  s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
57
57
  s.add_runtime_dependency(%q<andand>, ["~> 1.3.1"])
@@ -60,7 +60,7 @@ Gem::Specification.new do |s|
60
60
  s.add_runtime_dependency(%q<blockenspiel>, ["~> 0.3.2"])
61
61
  s.add_runtime_dependency(%q<log4r>, ["~> 1.1.7"])
62
62
  else
63
- s.add_dependency(%q<remote_table>, ["~> 0.2.5"])
63
+ s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
64
64
  s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
65
65
  s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
66
66
  s.add_dependency(%q<andand>, ["~> 1.3.1"])
@@ -70,7 +70,7 @@ Gem::Specification.new do |s|
70
70
  s.add_dependency(%q<log4r>, ["~> 1.1.7"])
71
71
  end
72
72
  else
73
- s.add_dependency(%q<remote_table>, ["~> 0.2.5"])
73
+ s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
74
74
  s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
75
75
  s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
76
76
  s.add_dependency(%q<andand>, ["~> 1.3.1"])
@@ -83,7 +83,7 @@ module DataMiner
83
83
  end
84
84
 
85
85
  def do_convert(row, value)
86
- logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
86
+ DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
87
87
  value.to_f.convert((from_units || unit_from_source(row)), to_units)
88
88
  end
89
89
 
@@ -72,11 +72,11 @@ module DataMiner
72
72
  import_runnables.each do |runnable|
73
73
  runnable.attributes.each do |_, attribute|
74
74
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
75
- DataMiner.logger.error %{
75
+ DataMiner.log_or_raise %{
76
76
 
77
77
  ================================
78
78
 
79
- [data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
79
+ You don't have a valid unit definition for #{resource.name}##{attribute.name}.
80
80
 
81
81
  You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
82
82
 
@@ -93,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
93
93
  missing_columns = Array.new
94
94
  import_runnables.each do |runnable|
95
95
  runnable.attributes.each do |_, attribute|
96
- DataMiner.logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
96
+ DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
97
97
  unless resource.column_names.include? attribute.name
98
98
  missing_columns << attribute.name
99
99
  end
@@ -104,11 +104,11 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
104
104
  end
105
105
  missing_columns.uniq!
106
106
  if missing_columns.any?
107
- DataMiner.logger.error %{
107
+ DataMiner.log_or_raise %{
108
108
 
109
109
  ================================
110
110
 
111
- [data_miner gem] On #{resource}, it looks like you're missing some columns...
111
+ On #{resource}, it looks like you're missing some columns...
112
112
 
113
113
  Please run this...
114
114
 
@@ -16,7 +16,7 @@ module DataMiner
16
16
  @position_in_run = position_in_run
17
17
  @description = description
18
18
  @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
19
- @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding))
19
+ @table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
20
20
  end
21
21
 
22
22
  def inspect
@@ -28,12 +28,12 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
31
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
32
32
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
33
33
  end
34
34
 
35
35
  def key(attr_name, attr_options = {})
36
- DataMiner.logger.error "[data_miner gem] You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
36
+ DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
37
37
  @key = attr_name
38
38
  store attr_name, attr_options
39
39
  end
data/lib/data_miner.rb CHANGED
@@ -32,6 +32,17 @@ module DataMiner
32
32
 
33
33
  self.logger = Logger.new 'data_miner'
34
34
  logger.add info_outputter, error_outputter
35
+ ActiveRecord::Base.logger = logger
36
+ ActiveRecord::Base.colorize_logging = false
37
+ end
38
+ end
39
+
40
+ def self.log_or_raise(message)
41
+ message = "[data_miner gem] #{message}"
42
+ if ENV['RAILS_ENV'] == 'production'
43
+ logger.error message
44
+ else
45
+ raise message
35
46
  end
36
47
  end
37
48
 
@@ -51,7 +62,7 @@ end
51
62
  ActiveRecord::Base.class_eval do
52
63
  def self.data_miner(&block)
53
64
  unless table_exists?
54
- logger.error "[DataMiner gem] Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
65
+ DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
55
66
  return
56
67
  end
57
68
 
@@ -481,7 +481,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
481
481
 
482
482
  # conversions are NOT performed here, since we first have to zero out legitimate skips
483
483
  # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
484
- import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do
484
+ import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
485
485
  key 'department_of_energy_identifier', :field_name => 'DOEID'
486
486
 
487
487
  store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
@@ -636,6 +636,252 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
636
636
  end
637
637
  end
638
638
 
639
+ # T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
640
+ class T100FlightSegment < ActiveRecord::Base
641
+ set_primary_key :row_hash
642
+ URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
643
+ FORM_DATA = %{
644
+ UserTableName=T_100_Segment__All_Carriers&
645
+ DBShortName=Air_Carriers&
646
+ RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
647
+ sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
648
+ varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
649
+ grouplist=&
650
+ suml=&
651
+ sumRegion=&
652
+ filter1=title%3D&
653
+ filter2=title%3D&
654
+ geo=All%A0&
655
+ time=__MONTH_NAME__&
656
+ timename=Month&
657
+ GEOGRAPHY=All&
658
+ XYEAR=__YEAR__&
659
+ FREQUENCY=__MONTH_NUMBER__&
660
+ AllVars=All&
661
+ VarName=DEPARTURES_SCHEDULED&
662
+ VarDesc=DepScheduled&
663
+ VarType=Num&
664
+ VarName=DEPARTURES_PERFORMED&
665
+ VarDesc=DepPerformed&
666
+ VarType=Num&
667
+ VarName=PAYLOAD&
668
+ VarDesc=Payload&
669
+ VarType=Num&
670
+ VarName=SEATS&
671
+ VarDesc=Seats&
672
+ VarType=Num&
673
+ VarName=PASSENGERS&
674
+ VarDesc=Passengers&
675
+ VarType=Num&
676
+ VarName=FREIGHT&
677
+ VarDesc=Freight&
678
+ VarType=Num&
679
+ VarName=MAIL&
680
+ VarDesc=Mail&
681
+ VarType=Num&
682
+ VarName=DISTANCE&
683
+ VarDesc=Distance&
684
+ VarType=Num&
685
+ VarName=RAMP_TO_RAMP&
686
+ VarDesc=RampToRamp&
687
+ VarType=Num&
688
+ VarName=AIR_TIME&
689
+ VarDesc=AirTime&
690
+ VarType=Num&
691
+ VarName=UNIQUE_CARRIER&
692
+ VarDesc=UniqueCarrier&
693
+ VarType=Char&
694
+ VarName=AIRLINE_ID&
695
+ VarDesc=AirlineID&
696
+ VarType=Num&
697
+ VarName=UNIQUE_CARRIER_NAME&
698
+ VarDesc=UniqueCarrierName&
699
+ VarType=Char&
700
+ VarName=UNIQUE_CARRIER_ENTITY&
701
+ VarDesc=UniqCarrierEntity&
702
+ VarType=Char&
703
+ VarName=REGION&
704
+ VarDesc=CarrierRegion&
705
+ VarType=Char&
706
+ VarName=CARRIER&
707
+ VarDesc=Carrier&
708
+ VarType=Char&
709
+ VarName=CARRIER_NAME&
710
+ VarDesc=CarrierName&
711
+ VarType=Char&
712
+ VarName=CARRIER_GROUP&
713
+ VarDesc=CarrierGroup&
714
+ VarType=Num&
715
+ VarName=CARRIER_GROUP_NEW&
716
+ VarDesc=CarrierGroupNew&
717
+ VarType=Num&
718
+ VarName=ORIGIN&
719
+ VarDesc=Origin&
720
+ VarType=Char&
721
+ VarName=ORIGIN_CITY_NAME&
722
+ VarDesc=OriginCityName&
723
+ VarType=Char&
724
+ VarName=ORIGIN_CITY_NUM&
725
+ VarDesc=OriginCityNum&
726
+ VarType=Num&
727
+ VarName=ORIGIN_STATE_ABR&
728
+ VarDesc=OriginState&
729
+ VarType=Char&
730
+ VarName=ORIGIN_STATE_FIPS&
731
+ VarDesc=OriginStateFips&
732
+ VarType=Char&
733
+ VarName=ORIGIN_STATE_NM&
734
+ VarDesc=OriginStateName&
735
+ VarType=Char&
736
+ VarName=ORIGIN_COUNTRY&
737
+ VarDesc=OriginCountry&
738
+ VarType=Char&
739
+ VarName=ORIGIN_COUNTRY_NAME&
740
+ VarDesc=OriginCountryName&
741
+ VarType=Char&
742
+ VarName=ORIGIN_WAC&
743
+ VarDesc=OriginWac&
744
+ VarType=Num&
745
+ VarName=DEST&
746
+ VarDesc=Dest&
747
+ VarType=Char&
748
+ VarName=DEST_CITY_NAME&
749
+ VarDesc=DestCityName&
750
+ VarType=Char&
751
+ VarName=DEST_CITY_NUM&
752
+ VarDesc=DestCityNum&
753
+ VarType=Num&
754
+ VarName=DEST_STATE_ABR&
755
+ VarDesc=DestState&
756
+ VarType=Char&
757
+ VarName=DEST_STATE_FIPS&
758
+ VarDesc=DestStateFips&
759
+ VarType=Char&
760
+ VarName=DEST_STATE_NM&
761
+ VarDesc=DestStateName&
762
+ VarType=Char&
763
+ VarName=DEST_COUNTRY&
764
+ VarDesc=DestCountry&
765
+ VarType=Char&
766
+ VarName=DEST_COUNTRY_NAME&
767
+ VarDesc=DestCountryName&
768
+ VarType=Char&
769
+ VarName=DEST_WAC&
770
+ VarDesc=DestWac&
771
+ VarType=Num&
772
+ VarName=AIRCRAFT_GROUP&
773
+ VarDesc=AircraftGroup&
774
+ VarType=Num&
775
+ VarName=AIRCRAFT_TYPE&
776
+ VarDesc=AircraftType&
777
+ VarType=Char&
778
+ VarName=AIRCRAFT_CONFIG&
779
+ VarDesc=AircraftConfig&
780
+ VarType=Num&
781
+ VarName=YEAR&
782
+ VarDesc=Year&
783
+ VarType=Num&
784
+ VarName=QUARTER&
785
+ VarDesc=Quarter&
786
+ VarType=Num&
787
+ VarName=MONTH&
788
+ VarDesc=Month&
789
+ VarType=Num&
790
+ VarName=DISTANCE_GROUP&
791
+ VarDesc=DistanceGroup&
792
+ VarType=Num&
793
+ VarName=CLASS&
794
+ VarDesc=Class&
795
+ VarType=Char&
796
+ VarName=DATA_SOURCE&
797
+ VarDesc=DataSource&
798
+ VarType=Char
799
+ }.gsub /[\s]+/,''
800
+
801
+ data_miner do
802
+ months = Hash.new
803
+ # (2008..2009).each do |year|
804
+ (2008..2008).each do |year|
805
+ # (1..12).each do |month|
806
+ (1..1).each do |month|
807
+ time = Time.gm year, month
808
+ form_data = FORM_DATA.dup
809
+ form_data.gsub! '__YEAR__', time.year.to_s
810
+ form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
811
+ form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
812
+ months[time] = form_data
813
+ end
814
+ end
815
+ months.each do |month, form_data|
816
+ import "T100 data from #{month.strftime('%B %Y')}",
817
+ :url => URL,
818
+ :form_data => form_data,
819
+ :compression => :zip,
820
+ :glob => '/*.csv' do
821
+ key 'row_hash'
822
+ store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
823
+ store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
824
+ store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
825
+ store 'seats', :field_name => 'SEATS'
826
+ store 'passengers', :field_name => 'PASSENGERS'
827
+ store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
828
+ store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
829
+ store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
830
+ store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
831
+ store 'air_time', :field_name => 'AIR_TIME'
832
+ store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
833
+ store 'dot_airline_id', :field_name => 'AIRLINE_ID'
834
+ store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
835
+ store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
836
+ store 'region', :field_name => 'REGION'
837
+ store 'carrier', :field_name => 'CARRIER'
838
+ store 'carrier_name', :field_name => 'CARRIER_NAME'
839
+ store 'carrier_group', :field_name => 'CARRIER_GROUP'
840
+ store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
841
+ store 'origin_airport_iata', :field_name => 'ORIGIN'
842
+ store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
843
+ store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
844
+ store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
845
+ store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
846
+ store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
847
+ store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
848
+ store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
849
+ store 'origin_wac', :field_name => 'ORIGIN_WAC'
850
+ store 'dest_airport_iata', :field_name => 'DEST'
851
+ store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
852
+ store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
853
+ store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
854
+ store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
855
+ store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
856
+ store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
857
+ store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
858
+ store 'dest_wac', :field_name => 'DEST_WAC'
859
+ store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
860
+ store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
861
+ store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
862
+ store 'year', :field_name => 'YEAR'
863
+ store 'quarter', :field_name => 'QUARTER'
864
+ store 'month', :field_name => 'MONTH'
865
+ store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
866
+ store 'bts_service_class', :field_name => 'CLASS'
867
+ store 'data_source', :field_name => 'DATA_SOURCE'
868
+ end
869
+ end
870
+
871
+ process 'Derive freight share as a fraction of payload' do
872
+ update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
873
+ end
874
+
875
+ process 'Derive load factor, which is passengers divided by the total seats available' do
876
+ update_all 'load_factor = passengers / seats', 'passengers <= seats'
877
+ end
878
+
879
+ process 'Derive average seats per departure' do
880
+ update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
881
+ end
882
+ end
883
+ end
884
+
639
885
  # todo: have somebody properly organize these
640
886
  class DataMinerTest < Test::Unit::TestCase
641
887
  if ENV['FAST'] == 'true'
@@ -733,9 +979,14 @@ class DataMinerTest < Test::Unit::TestCase
733
979
  assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
734
980
  end
735
981
 
736
- # should "mine residence survey day" do
737
- # ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
738
- # assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
739
- # end
982
+ should "mine T100 flight segments" do
983
+ T100FlightSegment.run_data_miner!
984
+ assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
985
+ end
986
+
987
+ should "mine residence survey day" do
988
+ ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
989
+ assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
990
+ end
740
991
  end
741
992
  end
data/test/test_helper.rb CHANGED
@@ -18,6 +18,71 @@ class Test::Unit::TestCase
18
18
  end
19
19
 
20
20
  ActiveRecord::Schema.define(:version => 20090819143429) do
21
+ create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
+ t.integer "departures_performed"
23
+ t.integer "payload"
24
+ t.integer "seats"
25
+ t.integer "passengers"
26
+ t.integer "freight"
27
+ t.integer "mail"
28
+ t.integer "ramp_to_ramp"
29
+ t.integer "air_time"
30
+ t.float "load_factor"
31
+ t.float "freight_share"
32
+ t.integer "distance"
33
+ t.integer "departures_scheduled"
34
+ t.string "unique_carrier"
35
+ t.integer "dot_airline_id"
36
+ t.string "unique_carrier_name"
37
+ t.string "unique_carrier_entity"
38
+ t.string "region"
39
+ t.string "carrier"
40
+ t.string "carrier_name"
41
+ t.integer "carrier_group"
42
+ t.integer "carrier_group_new"
43
+ t.string "origin_airport_iata"
44
+ t.string "origin_city_name"
45
+ t.integer "origin_city_num"
46
+ t.string "origin_state_abr"
47
+ t.string "origin_state_fips"
48
+ t.string "origin_state_nm"
49
+ t.string "origin_country_iso_3166"
50
+ t.string "origin_country_name"
51
+ t.integer "origin_wac"
52
+ t.string "dest_airport_iata"
53
+ t.string "dest_city_name"
54
+ t.integer "dest_city_num"
55
+ t.string "dest_state_abr"
56
+ t.string "dest_state_fips"
57
+ t.string "dest_state_nm"
58
+ t.string "dest_country_iso_3166"
59
+ t.string "dest_country_name"
60
+ t.integer "dest_wac"
61
+ t.integer "bts_aircraft_group"
62
+ t.integer "bts_aircraft_type"
63
+ t.integer "bts_aircraft_config"
64
+ t.integer "year"
65
+ t.integer "quarter"
66
+ t.integer "month"
67
+ t.integer "bts_distance_group"
68
+ t.string "bts_service_class"
69
+ t.string "data_source"
70
+ t.float "seats_per_departure"
71
+
72
+ t.string 'payload_units'
73
+ t.string 'freight_units'
74
+ t.string 'mail_units'
75
+ t.string 'distance_units'
76
+
77
+ t.datetime "created_at"
78
+ t.datetime "updated_at"
79
+
80
+ t.string "row_hash"
81
+ t.integer 'data_miner_touch_count'
82
+ t.integer 'data_miner_last_run_id'
83
+ end
84
+ execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
85
+
21
86
  create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
87
  t.string 'iata_code'
23
88
  t.string 'name'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2010-03-24 00:00:00 -04:00
13
+ date: 2010-03-26 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -21,7 +21,7 @@ dependencies:
21
21
  requirements:
22
22
  - - ~>
23
23
  - !ruby/object:Gem::Version
24
- version: 0.2.5
24
+ version: 0.2.6
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: activerecord