data_miner 0.4.4 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +5 -5
- data/lib/data_miner/attribute.rb +1 -1
- data/lib/data_miner/configuration.rb +5 -5
- data/lib/data_miner/import.rb +3 -3
- data/lib/data_miner.rb +12 -1
- data/test/data_miner_test.rb +256 -5
- data/test/test_helper.rb +65 -0
- metadata +3 -3
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
gem.add_dependency 'remote_table', '~>0.2.
|
13
|
+
gem.add_dependency 'remote_table', '~>0.2.6'
|
14
14
|
gem.add_dependency 'activerecord', '~>2.3.4'
|
15
15
|
gem.add_dependency 'activesupport', '~>2.3.4'
|
16
16
|
gem.add_dependency 'andand', '~>1.3.1'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.5
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-03-
|
12
|
+
s.date = %q{2010-03-26}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
|
|
51
51
|
s.specification_version = 3
|
52
52
|
|
53
53
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
54
|
-
s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.
|
54
|
+
s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.6"])
|
55
55
|
s.add_runtime_dependency(%q<activerecord>, ["~> 2.3.4"])
|
56
56
|
s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
|
57
57
|
s.add_runtime_dependency(%q<andand>, ["~> 1.3.1"])
|
@@ -60,7 +60,7 @@ Gem::Specification.new do |s|
|
|
60
60
|
s.add_runtime_dependency(%q<blockenspiel>, ["~> 0.3.2"])
|
61
61
|
s.add_runtime_dependency(%q<log4r>, ["~> 1.1.7"])
|
62
62
|
else
|
63
|
-
s.add_dependency(%q<remote_table>, ["~> 0.2.
|
63
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
|
64
64
|
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
65
65
|
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
66
66
|
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
@@ -70,7 +70,7 @@ Gem::Specification.new do |s|
|
|
70
70
|
s.add_dependency(%q<log4r>, ["~> 1.1.7"])
|
71
71
|
end
|
72
72
|
else
|
73
|
-
s.add_dependency(%q<remote_table>, ["~> 0.2.
|
73
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
|
74
74
|
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
75
75
|
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
76
76
|
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -83,7 +83,7 @@ module DataMiner
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def do_convert(row, value)
|
86
|
-
|
86
|
+
DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
|
87
87
|
value.to_f.convert((from_units || unit_from_source(row)), to_units)
|
88
88
|
end
|
89
89
|
|
@@ -72,11 +72,11 @@ module DataMiner
|
|
72
72
|
import_runnables.each do |runnable|
|
73
73
|
runnable.attributes.each do |_, attribute|
|
74
74
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
75
|
-
DataMiner.
|
75
|
+
DataMiner.log_or_raise %{
|
76
76
|
|
77
77
|
================================
|
78
78
|
|
79
|
-
|
79
|
+
You don't have a valid unit definition for #{resource.name}##{attribute.name}.
|
80
80
|
|
81
81
|
You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
|
82
82
|
|
@@ -93,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
93
93
|
missing_columns = Array.new
|
94
94
|
import_runnables.each do |runnable|
|
95
95
|
runnable.attributes.each do |_, attribute|
|
96
|
-
DataMiner.
|
96
|
+
DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
97
97
|
unless resource.column_names.include? attribute.name
|
98
98
|
missing_columns << attribute.name
|
99
99
|
end
|
@@ -104,11 +104,11 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
104
104
|
end
|
105
105
|
missing_columns.uniq!
|
106
106
|
if missing_columns.any?
|
107
|
-
DataMiner.
|
107
|
+
DataMiner.log_or_raise %{
|
108
108
|
|
109
109
|
================================
|
110
110
|
|
111
|
-
|
111
|
+
On #{resource}, it looks like you're missing some columns...
|
112
112
|
|
113
113
|
Please run this...
|
114
114
|
|
data/lib/data_miner/import.rb
CHANGED
@@ -16,7 +16,7 @@ module DataMiner
|
|
16
16
|
@position_in_run = position_in_run
|
17
17
|
@description = description
|
18
18
|
@errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
|
19
|
-
@table = RemoteTable.new(options.slice(:url, :filename, :
|
19
|
+
@table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
|
20
20
|
end
|
21
21
|
|
22
22
|
def inspect
|
@@ -28,12 +28,12 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
-
DataMiner.
|
31
|
+
DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
32
32
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
33
33
|
end
|
34
34
|
|
35
35
|
def key(attr_name, attr_options = {})
|
36
|
-
DataMiner.
|
36
|
+
DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
37
37
|
@key = attr_name
|
38
38
|
store attr_name, attr_options
|
39
39
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -32,6 +32,17 @@ module DataMiner
|
|
32
32
|
|
33
33
|
self.logger = Logger.new 'data_miner'
|
34
34
|
logger.add info_outputter, error_outputter
|
35
|
+
ActiveRecord::Base.logger = logger
|
36
|
+
ActiveRecord::Base.colorize_logging = false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.log_or_raise(message)
|
41
|
+
message = "[data_miner gem] #{message}"
|
42
|
+
if ENV['RAILS_ENV'] == 'production'
|
43
|
+
logger.error message
|
44
|
+
else
|
45
|
+
raise message
|
35
46
|
end
|
36
47
|
end
|
37
48
|
|
@@ -51,7 +62,7 @@ end
|
|
51
62
|
ActiveRecord::Base.class_eval do
|
52
63
|
def self.data_miner(&block)
|
53
64
|
unless table_exists?
|
54
|
-
|
65
|
+
DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
|
55
66
|
return
|
56
67
|
end
|
57
68
|
|
data/test/data_miner_test.rb
CHANGED
@@ -481,7 +481,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
481
481
|
|
482
482
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
483
483
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
484
|
-
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv'
|
484
|
+
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
485
485
|
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
486
486
|
|
487
487
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
@@ -636,6 +636,252 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
636
636
|
end
|
637
637
|
end
|
638
638
|
|
639
|
+
# T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
|
640
|
+
class T100FlightSegment < ActiveRecord::Base
|
641
|
+
set_primary_key :row_hash
|
642
|
+
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
643
|
+
FORM_DATA = %{
|
644
|
+
UserTableName=T_100_Segment__All_Carriers&
|
645
|
+
DBShortName=Air_Carriers&
|
646
|
+
RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
|
647
|
+
sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
|
648
|
+
varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
|
649
|
+
grouplist=&
|
650
|
+
suml=&
|
651
|
+
sumRegion=&
|
652
|
+
filter1=title%3D&
|
653
|
+
filter2=title%3D&
|
654
|
+
geo=All%A0&
|
655
|
+
time=__MONTH_NAME__&
|
656
|
+
timename=Month&
|
657
|
+
GEOGRAPHY=All&
|
658
|
+
XYEAR=__YEAR__&
|
659
|
+
FREQUENCY=__MONTH_NUMBER__&
|
660
|
+
AllVars=All&
|
661
|
+
VarName=DEPARTURES_SCHEDULED&
|
662
|
+
VarDesc=DepScheduled&
|
663
|
+
VarType=Num&
|
664
|
+
VarName=DEPARTURES_PERFORMED&
|
665
|
+
VarDesc=DepPerformed&
|
666
|
+
VarType=Num&
|
667
|
+
VarName=PAYLOAD&
|
668
|
+
VarDesc=Payload&
|
669
|
+
VarType=Num&
|
670
|
+
VarName=SEATS&
|
671
|
+
VarDesc=Seats&
|
672
|
+
VarType=Num&
|
673
|
+
VarName=PASSENGERS&
|
674
|
+
VarDesc=Passengers&
|
675
|
+
VarType=Num&
|
676
|
+
VarName=FREIGHT&
|
677
|
+
VarDesc=Freight&
|
678
|
+
VarType=Num&
|
679
|
+
VarName=MAIL&
|
680
|
+
VarDesc=Mail&
|
681
|
+
VarType=Num&
|
682
|
+
VarName=DISTANCE&
|
683
|
+
VarDesc=Distance&
|
684
|
+
VarType=Num&
|
685
|
+
VarName=RAMP_TO_RAMP&
|
686
|
+
VarDesc=RampToRamp&
|
687
|
+
VarType=Num&
|
688
|
+
VarName=AIR_TIME&
|
689
|
+
VarDesc=AirTime&
|
690
|
+
VarType=Num&
|
691
|
+
VarName=UNIQUE_CARRIER&
|
692
|
+
VarDesc=UniqueCarrier&
|
693
|
+
VarType=Char&
|
694
|
+
VarName=AIRLINE_ID&
|
695
|
+
VarDesc=AirlineID&
|
696
|
+
VarType=Num&
|
697
|
+
VarName=UNIQUE_CARRIER_NAME&
|
698
|
+
VarDesc=UniqueCarrierName&
|
699
|
+
VarType=Char&
|
700
|
+
VarName=UNIQUE_CARRIER_ENTITY&
|
701
|
+
VarDesc=UniqCarrierEntity&
|
702
|
+
VarType=Char&
|
703
|
+
VarName=REGION&
|
704
|
+
VarDesc=CarrierRegion&
|
705
|
+
VarType=Char&
|
706
|
+
VarName=CARRIER&
|
707
|
+
VarDesc=Carrier&
|
708
|
+
VarType=Char&
|
709
|
+
VarName=CARRIER_NAME&
|
710
|
+
VarDesc=CarrierName&
|
711
|
+
VarType=Char&
|
712
|
+
VarName=CARRIER_GROUP&
|
713
|
+
VarDesc=CarrierGroup&
|
714
|
+
VarType=Num&
|
715
|
+
VarName=CARRIER_GROUP_NEW&
|
716
|
+
VarDesc=CarrierGroupNew&
|
717
|
+
VarType=Num&
|
718
|
+
VarName=ORIGIN&
|
719
|
+
VarDesc=Origin&
|
720
|
+
VarType=Char&
|
721
|
+
VarName=ORIGIN_CITY_NAME&
|
722
|
+
VarDesc=OriginCityName&
|
723
|
+
VarType=Char&
|
724
|
+
VarName=ORIGIN_CITY_NUM&
|
725
|
+
VarDesc=OriginCityNum&
|
726
|
+
VarType=Num&
|
727
|
+
VarName=ORIGIN_STATE_ABR&
|
728
|
+
VarDesc=OriginState&
|
729
|
+
VarType=Char&
|
730
|
+
VarName=ORIGIN_STATE_FIPS&
|
731
|
+
VarDesc=OriginStateFips&
|
732
|
+
VarType=Char&
|
733
|
+
VarName=ORIGIN_STATE_NM&
|
734
|
+
VarDesc=OriginStateName&
|
735
|
+
VarType=Char&
|
736
|
+
VarName=ORIGIN_COUNTRY&
|
737
|
+
VarDesc=OriginCountry&
|
738
|
+
VarType=Char&
|
739
|
+
VarName=ORIGIN_COUNTRY_NAME&
|
740
|
+
VarDesc=OriginCountryName&
|
741
|
+
VarType=Char&
|
742
|
+
VarName=ORIGIN_WAC&
|
743
|
+
VarDesc=OriginWac&
|
744
|
+
VarType=Num&
|
745
|
+
VarName=DEST&
|
746
|
+
VarDesc=Dest&
|
747
|
+
VarType=Char&
|
748
|
+
VarName=DEST_CITY_NAME&
|
749
|
+
VarDesc=DestCityName&
|
750
|
+
VarType=Char&
|
751
|
+
VarName=DEST_CITY_NUM&
|
752
|
+
VarDesc=DestCityNum&
|
753
|
+
VarType=Num&
|
754
|
+
VarName=DEST_STATE_ABR&
|
755
|
+
VarDesc=DestState&
|
756
|
+
VarType=Char&
|
757
|
+
VarName=DEST_STATE_FIPS&
|
758
|
+
VarDesc=DestStateFips&
|
759
|
+
VarType=Char&
|
760
|
+
VarName=DEST_STATE_NM&
|
761
|
+
VarDesc=DestStateName&
|
762
|
+
VarType=Char&
|
763
|
+
VarName=DEST_COUNTRY&
|
764
|
+
VarDesc=DestCountry&
|
765
|
+
VarType=Char&
|
766
|
+
VarName=DEST_COUNTRY_NAME&
|
767
|
+
VarDesc=DestCountryName&
|
768
|
+
VarType=Char&
|
769
|
+
VarName=DEST_WAC&
|
770
|
+
VarDesc=DestWac&
|
771
|
+
VarType=Num&
|
772
|
+
VarName=AIRCRAFT_GROUP&
|
773
|
+
VarDesc=AircraftGroup&
|
774
|
+
VarType=Num&
|
775
|
+
VarName=AIRCRAFT_TYPE&
|
776
|
+
VarDesc=AircraftType&
|
777
|
+
VarType=Char&
|
778
|
+
VarName=AIRCRAFT_CONFIG&
|
779
|
+
VarDesc=AircraftConfig&
|
780
|
+
VarType=Num&
|
781
|
+
VarName=YEAR&
|
782
|
+
VarDesc=Year&
|
783
|
+
VarType=Num&
|
784
|
+
VarName=QUARTER&
|
785
|
+
VarDesc=Quarter&
|
786
|
+
VarType=Num&
|
787
|
+
VarName=MONTH&
|
788
|
+
VarDesc=Month&
|
789
|
+
VarType=Num&
|
790
|
+
VarName=DISTANCE_GROUP&
|
791
|
+
VarDesc=DistanceGroup&
|
792
|
+
VarType=Num&
|
793
|
+
VarName=CLASS&
|
794
|
+
VarDesc=Class&
|
795
|
+
VarType=Char&
|
796
|
+
VarName=DATA_SOURCE&
|
797
|
+
VarDesc=DataSource&
|
798
|
+
VarType=Char
|
799
|
+
}.gsub /[\s]+/,''
|
800
|
+
|
801
|
+
data_miner do
|
802
|
+
months = Hash.new
|
803
|
+
# (2008..2009).each do |year|
|
804
|
+
(2008..2008).each do |year|
|
805
|
+
# (1..12).each do |month|
|
806
|
+
(1..1).each do |month|
|
807
|
+
time = Time.gm year, month
|
808
|
+
form_data = FORM_DATA.dup
|
809
|
+
form_data.gsub! '__YEAR__', time.year.to_s
|
810
|
+
form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
|
811
|
+
form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
|
812
|
+
months[time] = form_data
|
813
|
+
end
|
814
|
+
end
|
815
|
+
months.each do |month, form_data|
|
816
|
+
import "T100 data from #{month.strftime('%B %Y')}",
|
817
|
+
:url => URL,
|
818
|
+
:form_data => form_data,
|
819
|
+
:compression => :zip,
|
820
|
+
:glob => '/*.csv' do
|
821
|
+
key 'row_hash'
|
822
|
+
store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
823
|
+
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
824
|
+
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
825
|
+
store 'seats', :field_name => 'SEATS'
|
826
|
+
store 'passengers', :field_name => 'PASSENGERS'
|
827
|
+
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
828
|
+
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
829
|
+
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
830
|
+
store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
|
831
|
+
store 'air_time', :field_name => 'AIR_TIME'
|
832
|
+
store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
|
833
|
+
store 'dot_airline_id', :field_name => 'AIRLINE_ID'
|
834
|
+
store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
835
|
+
store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
836
|
+
store 'region', :field_name => 'REGION'
|
837
|
+
store 'carrier', :field_name => 'CARRIER'
|
838
|
+
store 'carrier_name', :field_name => 'CARRIER_NAME'
|
839
|
+
store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
840
|
+
store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
841
|
+
store 'origin_airport_iata', :field_name => 'ORIGIN'
|
842
|
+
store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
843
|
+
store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
844
|
+
store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
845
|
+
store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
846
|
+
store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
847
|
+
store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
|
848
|
+
store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
849
|
+
store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
850
|
+
store 'dest_airport_iata', :field_name => 'DEST'
|
851
|
+
store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
852
|
+
store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
853
|
+
store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
854
|
+
store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
855
|
+
store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
856
|
+
store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
|
857
|
+
store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
858
|
+
store 'dest_wac', :field_name => 'DEST_WAC'
|
859
|
+
store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
|
860
|
+
store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
|
861
|
+
store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
|
862
|
+
store 'year', :field_name => 'YEAR'
|
863
|
+
store 'quarter', :field_name => 'QUARTER'
|
864
|
+
store 'month', :field_name => 'MONTH'
|
865
|
+
store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
|
866
|
+
store 'bts_service_class', :field_name => 'CLASS'
|
867
|
+
store 'data_source', :field_name => 'DATA_SOURCE'
|
868
|
+
end
|
869
|
+
end
|
870
|
+
|
871
|
+
process 'Derive freight share as a fraction of payload' do
|
872
|
+
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
873
|
+
end
|
874
|
+
|
875
|
+
process 'Derive load factor, which is passengers divided by the total seats available' do
|
876
|
+
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
877
|
+
end
|
878
|
+
|
879
|
+
process 'Derive average seats per departure' do
|
880
|
+
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
881
|
+
end
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
639
885
|
# todo: have somebody properly organize these
|
640
886
|
class DataMinerTest < Test::Unit::TestCase
|
641
887
|
if ENV['FAST'] == 'true'
|
@@ -733,9 +979,14 @@ class DataMinerTest < Test::Unit::TestCase
|
|
733
979
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
734
980
|
end
|
735
981
|
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
982
|
+
should "mine T100 flight segments" do
|
983
|
+
T100FlightSegment.run_data_miner!
|
984
|
+
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
985
|
+
end
|
986
|
+
|
987
|
+
should "mine residence survey day" do
|
988
|
+
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
989
|
+
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
|
990
|
+
end
|
740
991
|
end
|
741
992
|
end
|
data/test/test_helper.rb
CHANGED
@@ -18,6 +18,71 @@ class Test::Unit::TestCase
|
|
18
18
|
end
|
19
19
|
|
20
20
|
ActiveRecord::Schema.define(:version => 20090819143429) do
|
21
|
+
create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
|
+
t.integer "departures_performed"
|
23
|
+
t.integer "payload"
|
24
|
+
t.integer "seats"
|
25
|
+
t.integer "passengers"
|
26
|
+
t.integer "freight"
|
27
|
+
t.integer "mail"
|
28
|
+
t.integer "ramp_to_ramp"
|
29
|
+
t.integer "air_time"
|
30
|
+
t.float "load_factor"
|
31
|
+
t.float "freight_share"
|
32
|
+
t.integer "distance"
|
33
|
+
t.integer "departures_scheduled"
|
34
|
+
t.string "unique_carrier"
|
35
|
+
t.integer "dot_airline_id"
|
36
|
+
t.string "unique_carrier_name"
|
37
|
+
t.string "unique_carrier_entity"
|
38
|
+
t.string "region"
|
39
|
+
t.string "carrier"
|
40
|
+
t.string "carrier_name"
|
41
|
+
t.integer "carrier_group"
|
42
|
+
t.integer "carrier_group_new"
|
43
|
+
t.string "origin_airport_iata"
|
44
|
+
t.string "origin_city_name"
|
45
|
+
t.integer "origin_city_num"
|
46
|
+
t.string "origin_state_abr"
|
47
|
+
t.string "origin_state_fips"
|
48
|
+
t.string "origin_state_nm"
|
49
|
+
t.string "origin_country_iso_3166"
|
50
|
+
t.string "origin_country_name"
|
51
|
+
t.integer "origin_wac"
|
52
|
+
t.string "dest_airport_iata"
|
53
|
+
t.string "dest_city_name"
|
54
|
+
t.integer "dest_city_num"
|
55
|
+
t.string "dest_state_abr"
|
56
|
+
t.string "dest_state_fips"
|
57
|
+
t.string "dest_state_nm"
|
58
|
+
t.string "dest_country_iso_3166"
|
59
|
+
t.string "dest_country_name"
|
60
|
+
t.integer "dest_wac"
|
61
|
+
t.integer "bts_aircraft_group"
|
62
|
+
t.integer "bts_aircraft_type"
|
63
|
+
t.integer "bts_aircraft_config"
|
64
|
+
t.integer "year"
|
65
|
+
t.integer "quarter"
|
66
|
+
t.integer "month"
|
67
|
+
t.integer "bts_distance_group"
|
68
|
+
t.string "bts_service_class"
|
69
|
+
t.string "data_source"
|
70
|
+
t.float "seats_per_departure"
|
71
|
+
|
72
|
+
t.string 'payload_units'
|
73
|
+
t.string 'freight_units'
|
74
|
+
t.string 'mail_units'
|
75
|
+
t.string 'distance_units'
|
76
|
+
|
77
|
+
t.datetime "created_at"
|
78
|
+
t.datetime "updated_at"
|
79
|
+
|
80
|
+
t.string "row_hash"
|
81
|
+
t.integer 'data_miner_touch_count'
|
82
|
+
t.integer 'data_miner_last_run_id'
|
83
|
+
end
|
84
|
+
execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
|
85
|
+
|
21
86
|
create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
87
|
t.string 'iata_code'
|
23
88
|
t.string 'name'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2010-03-
|
13
|
+
date: 2010-03-26 00:00:00 -04:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - ~>
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 0.2.
|
24
|
+
version: 0.2.6
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: activerecord
|