data_miner 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +5 -5
- data/lib/data_miner/attribute.rb +1 -1
- data/lib/data_miner/configuration.rb +5 -5
- data/lib/data_miner/import.rb +3 -3
- data/lib/data_miner.rb +12 -1
- data/test/data_miner_test.rb +256 -5
- data/test/test_helper.rb +65 -0
- metadata +3 -3
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
gem.add_dependency 'remote_table', '~>0.2.
|
13
|
+
gem.add_dependency 'remote_table', '~>0.2.6'
|
14
14
|
gem.add_dependency 'activerecord', '~>2.3.4'
|
15
15
|
gem.add_dependency 'activesupport', '~>2.3.4'
|
16
16
|
gem.add_dependency 'andand', '~>1.3.1'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.5
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-03-
|
12
|
+
s.date = %q{2010-03-26}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
|
|
51
51
|
s.specification_version = 3
|
52
52
|
|
53
53
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
54
|
-
s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.
|
54
|
+
s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.6"])
|
55
55
|
s.add_runtime_dependency(%q<activerecord>, ["~> 2.3.4"])
|
56
56
|
s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
|
57
57
|
s.add_runtime_dependency(%q<andand>, ["~> 1.3.1"])
|
@@ -60,7 +60,7 @@ Gem::Specification.new do |s|
|
|
60
60
|
s.add_runtime_dependency(%q<blockenspiel>, ["~> 0.3.2"])
|
61
61
|
s.add_runtime_dependency(%q<log4r>, ["~> 1.1.7"])
|
62
62
|
else
|
63
|
-
s.add_dependency(%q<remote_table>, ["~> 0.2.
|
63
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
|
64
64
|
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
65
65
|
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
66
66
|
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
@@ -70,7 +70,7 @@ Gem::Specification.new do |s|
|
|
70
70
|
s.add_dependency(%q<log4r>, ["~> 1.1.7"])
|
71
71
|
end
|
72
72
|
else
|
73
|
-
s.add_dependency(%q<remote_table>, ["~> 0.2.
|
73
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.6"])
|
74
74
|
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
75
75
|
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
76
76
|
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -83,7 +83,7 @@ module DataMiner
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def do_convert(row, value)
|
86
|
-
|
86
|
+
DataMiner.log_or_raise "If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
|
87
87
|
value.to_f.convert((from_units || unit_from_source(row)), to_units)
|
88
88
|
end
|
89
89
|
|
@@ -72,11 +72,11 @@ module DataMiner
|
|
72
72
|
import_runnables.each do |runnable|
|
73
73
|
runnable.attributes.each do |_, attribute|
|
74
74
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
75
|
-
DataMiner.
|
75
|
+
DataMiner.log_or_raise %{
|
76
76
|
|
77
77
|
================================
|
78
78
|
|
79
|
-
|
79
|
+
You don't have a valid unit definition for #{resource.name}##{attribute.name}.
|
80
80
|
|
81
81
|
You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
|
82
82
|
|
@@ -93,7 +93,7 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
93
93
|
missing_columns = Array.new
|
94
94
|
import_runnables.each do |runnable|
|
95
95
|
runnable.attributes.each do |_, attribute|
|
96
|
-
DataMiner.
|
96
|
+
DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
97
97
|
unless resource.column_names.include? attribute.name
|
98
98
|
missing_columns << attribute.name
|
99
99
|
end
|
@@ -104,11 +104,11 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
104
104
|
end
|
105
105
|
missing_columns.uniq!
|
106
106
|
if missing_columns.any?
|
107
|
-
DataMiner.
|
107
|
+
DataMiner.log_or_raise %{
|
108
108
|
|
109
109
|
================================
|
110
110
|
|
111
|
-
|
111
|
+
On #{resource}, it looks like you're missing some columns...
|
112
112
|
|
113
113
|
Please run this...
|
114
114
|
|
data/lib/data_miner/import.rb
CHANGED
@@ -16,7 +16,7 @@ module DataMiner
|
|
16
16
|
@position_in_run = position_in_run
|
17
17
|
@description = description
|
18
18
|
@errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
|
19
|
-
@table = RemoteTable.new(options.slice(:url, :filename, :
|
19
|
+
@table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
|
20
20
|
end
|
21
21
|
|
22
22
|
def inspect
|
@@ -28,12 +28,12 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
-
DataMiner.
|
31
|
+
DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
32
32
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
33
33
|
end
|
34
34
|
|
35
35
|
def key(attr_name, attr_options = {})
|
36
|
-
DataMiner.
|
36
|
+
DataMiner.log_or_raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
37
37
|
@key = attr_name
|
38
38
|
store attr_name, attr_options
|
39
39
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -32,6 +32,17 @@ module DataMiner
|
|
32
32
|
|
33
33
|
self.logger = Logger.new 'data_miner'
|
34
34
|
logger.add info_outputter, error_outputter
|
35
|
+
ActiveRecord::Base.logger = logger
|
36
|
+
ActiveRecord::Base.colorize_logging = false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.log_or_raise(message)
|
41
|
+
message = "[data_miner gem] #{message}"
|
42
|
+
if ENV['RAILS_ENV'] == 'production'
|
43
|
+
logger.error message
|
44
|
+
else
|
45
|
+
raise message
|
35
46
|
end
|
36
47
|
end
|
37
48
|
|
@@ -51,7 +62,7 @@ end
|
|
51
62
|
ActiveRecord::Base.class_eval do
|
52
63
|
def self.data_miner(&block)
|
53
64
|
unless table_exists?
|
54
|
-
|
65
|
+
DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
|
55
66
|
return
|
56
67
|
end
|
57
68
|
|
data/test/data_miner_test.rb
CHANGED
@@ -481,7 +481,7 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
481
481
|
|
482
482
|
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
483
483
|
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
484
|
-
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv'
|
484
|
+
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
485
485
|
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
486
486
|
|
487
487
|
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
@@ -636,6 +636,252 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
636
636
|
end
|
637
637
|
end
|
638
638
|
|
639
|
+
# T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
|
640
|
+
class T100FlightSegment < ActiveRecord::Base
|
641
|
+
set_primary_key :row_hash
|
642
|
+
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
643
|
+
FORM_DATA = %{
|
644
|
+
UserTableName=T_100_Segment__All_Carriers&
|
645
|
+
DBShortName=Air_Carriers&
|
646
|
+
RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
|
647
|
+
sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
|
648
|
+
varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
|
649
|
+
grouplist=&
|
650
|
+
suml=&
|
651
|
+
sumRegion=&
|
652
|
+
filter1=title%3D&
|
653
|
+
filter2=title%3D&
|
654
|
+
geo=All%A0&
|
655
|
+
time=__MONTH_NAME__&
|
656
|
+
timename=Month&
|
657
|
+
GEOGRAPHY=All&
|
658
|
+
XYEAR=__YEAR__&
|
659
|
+
FREQUENCY=__MONTH_NUMBER__&
|
660
|
+
AllVars=All&
|
661
|
+
VarName=DEPARTURES_SCHEDULED&
|
662
|
+
VarDesc=DepScheduled&
|
663
|
+
VarType=Num&
|
664
|
+
VarName=DEPARTURES_PERFORMED&
|
665
|
+
VarDesc=DepPerformed&
|
666
|
+
VarType=Num&
|
667
|
+
VarName=PAYLOAD&
|
668
|
+
VarDesc=Payload&
|
669
|
+
VarType=Num&
|
670
|
+
VarName=SEATS&
|
671
|
+
VarDesc=Seats&
|
672
|
+
VarType=Num&
|
673
|
+
VarName=PASSENGERS&
|
674
|
+
VarDesc=Passengers&
|
675
|
+
VarType=Num&
|
676
|
+
VarName=FREIGHT&
|
677
|
+
VarDesc=Freight&
|
678
|
+
VarType=Num&
|
679
|
+
VarName=MAIL&
|
680
|
+
VarDesc=Mail&
|
681
|
+
VarType=Num&
|
682
|
+
VarName=DISTANCE&
|
683
|
+
VarDesc=Distance&
|
684
|
+
VarType=Num&
|
685
|
+
VarName=RAMP_TO_RAMP&
|
686
|
+
VarDesc=RampToRamp&
|
687
|
+
VarType=Num&
|
688
|
+
VarName=AIR_TIME&
|
689
|
+
VarDesc=AirTime&
|
690
|
+
VarType=Num&
|
691
|
+
VarName=UNIQUE_CARRIER&
|
692
|
+
VarDesc=UniqueCarrier&
|
693
|
+
VarType=Char&
|
694
|
+
VarName=AIRLINE_ID&
|
695
|
+
VarDesc=AirlineID&
|
696
|
+
VarType=Num&
|
697
|
+
VarName=UNIQUE_CARRIER_NAME&
|
698
|
+
VarDesc=UniqueCarrierName&
|
699
|
+
VarType=Char&
|
700
|
+
VarName=UNIQUE_CARRIER_ENTITY&
|
701
|
+
VarDesc=UniqCarrierEntity&
|
702
|
+
VarType=Char&
|
703
|
+
VarName=REGION&
|
704
|
+
VarDesc=CarrierRegion&
|
705
|
+
VarType=Char&
|
706
|
+
VarName=CARRIER&
|
707
|
+
VarDesc=Carrier&
|
708
|
+
VarType=Char&
|
709
|
+
VarName=CARRIER_NAME&
|
710
|
+
VarDesc=CarrierName&
|
711
|
+
VarType=Char&
|
712
|
+
VarName=CARRIER_GROUP&
|
713
|
+
VarDesc=CarrierGroup&
|
714
|
+
VarType=Num&
|
715
|
+
VarName=CARRIER_GROUP_NEW&
|
716
|
+
VarDesc=CarrierGroupNew&
|
717
|
+
VarType=Num&
|
718
|
+
VarName=ORIGIN&
|
719
|
+
VarDesc=Origin&
|
720
|
+
VarType=Char&
|
721
|
+
VarName=ORIGIN_CITY_NAME&
|
722
|
+
VarDesc=OriginCityName&
|
723
|
+
VarType=Char&
|
724
|
+
VarName=ORIGIN_CITY_NUM&
|
725
|
+
VarDesc=OriginCityNum&
|
726
|
+
VarType=Num&
|
727
|
+
VarName=ORIGIN_STATE_ABR&
|
728
|
+
VarDesc=OriginState&
|
729
|
+
VarType=Char&
|
730
|
+
VarName=ORIGIN_STATE_FIPS&
|
731
|
+
VarDesc=OriginStateFips&
|
732
|
+
VarType=Char&
|
733
|
+
VarName=ORIGIN_STATE_NM&
|
734
|
+
VarDesc=OriginStateName&
|
735
|
+
VarType=Char&
|
736
|
+
VarName=ORIGIN_COUNTRY&
|
737
|
+
VarDesc=OriginCountry&
|
738
|
+
VarType=Char&
|
739
|
+
VarName=ORIGIN_COUNTRY_NAME&
|
740
|
+
VarDesc=OriginCountryName&
|
741
|
+
VarType=Char&
|
742
|
+
VarName=ORIGIN_WAC&
|
743
|
+
VarDesc=OriginWac&
|
744
|
+
VarType=Num&
|
745
|
+
VarName=DEST&
|
746
|
+
VarDesc=Dest&
|
747
|
+
VarType=Char&
|
748
|
+
VarName=DEST_CITY_NAME&
|
749
|
+
VarDesc=DestCityName&
|
750
|
+
VarType=Char&
|
751
|
+
VarName=DEST_CITY_NUM&
|
752
|
+
VarDesc=DestCityNum&
|
753
|
+
VarType=Num&
|
754
|
+
VarName=DEST_STATE_ABR&
|
755
|
+
VarDesc=DestState&
|
756
|
+
VarType=Char&
|
757
|
+
VarName=DEST_STATE_FIPS&
|
758
|
+
VarDesc=DestStateFips&
|
759
|
+
VarType=Char&
|
760
|
+
VarName=DEST_STATE_NM&
|
761
|
+
VarDesc=DestStateName&
|
762
|
+
VarType=Char&
|
763
|
+
VarName=DEST_COUNTRY&
|
764
|
+
VarDesc=DestCountry&
|
765
|
+
VarType=Char&
|
766
|
+
VarName=DEST_COUNTRY_NAME&
|
767
|
+
VarDesc=DestCountryName&
|
768
|
+
VarType=Char&
|
769
|
+
VarName=DEST_WAC&
|
770
|
+
VarDesc=DestWac&
|
771
|
+
VarType=Num&
|
772
|
+
VarName=AIRCRAFT_GROUP&
|
773
|
+
VarDesc=AircraftGroup&
|
774
|
+
VarType=Num&
|
775
|
+
VarName=AIRCRAFT_TYPE&
|
776
|
+
VarDesc=AircraftType&
|
777
|
+
VarType=Char&
|
778
|
+
VarName=AIRCRAFT_CONFIG&
|
779
|
+
VarDesc=AircraftConfig&
|
780
|
+
VarType=Num&
|
781
|
+
VarName=YEAR&
|
782
|
+
VarDesc=Year&
|
783
|
+
VarType=Num&
|
784
|
+
VarName=QUARTER&
|
785
|
+
VarDesc=Quarter&
|
786
|
+
VarType=Num&
|
787
|
+
VarName=MONTH&
|
788
|
+
VarDesc=Month&
|
789
|
+
VarType=Num&
|
790
|
+
VarName=DISTANCE_GROUP&
|
791
|
+
VarDesc=DistanceGroup&
|
792
|
+
VarType=Num&
|
793
|
+
VarName=CLASS&
|
794
|
+
VarDesc=Class&
|
795
|
+
VarType=Char&
|
796
|
+
VarName=DATA_SOURCE&
|
797
|
+
VarDesc=DataSource&
|
798
|
+
VarType=Char
|
799
|
+
}.gsub /[\s]+/,''
|
800
|
+
|
801
|
+
data_miner do
|
802
|
+
months = Hash.new
|
803
|
+
# (2008..2009).each do |year|
|
804
|
+
(2008..2008).each do |year|
|
805
|
+
# (1..12).each do |month|
|
806
|
+
(1..1).each do |month|
|
807
|
+
time = Time.gm year, month
|
808
|
+
form_data = FORM_DATA.dup
|
809
|
+
form_data.gsub! '__YEAR__', time.year.to_s
|
810
|
+
form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
|
811
|
+
form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
|
812
|
+
months[time] = form_data
|
813
|
+
end
|
814
|
+
end
|
815
|
+
months.each do |month, form_data|
|
816
|
+
import "T100 data from #{month.strftime('%B %Y')}",
|
817
|
+
:url => URL,
|
818
|
+
:form_data => form_data,
|
819
|
+
:compression => :zip,
|
820
|
+
:glob => '/*.csv' do
|
821
|
+
key 'row_hash'
|
822
|
+
store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
823
|
+
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
824
|
+
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
825
|
+
store 'seats', :field_name => 'SEATS'
|
826
|
+
store 'passengers', :field_name => 'PASSENGERS'
|
827
|
+
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
828
|
+
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
829
|
+
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
830
|
+
store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
|
831
|
+
store 'air_time', :field_name => 'AIR_TIME'
|
832
|
+
store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
|
833
|
+
store 'dot_airline_id', :field_name => 'AIRLINE_ID'
|
834
|
+
store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
835
|
+
store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
836
|
+
store 'region', :field_name => 'REGION'
|
837
|
+
store 'carrier', :field_name => 'CARRIER'
|
838
|
+
store 'carrier_name', :field_name => 'CARRIER_NAME'
|
839
|
+
store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
840
|
+
store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
841
|
+
store 'origin_airport_iata', :field_name => 'ORIGIN'
|
842
|
+
store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
843
|
+
store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
844
|
+
store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
845
|
+
store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
846
|
+
store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
847
|
+
store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
|
848
|
+
store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
849
|
+
store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
850
|
+
store 'dest_airport_iata', :field_name => 'DEST'
|
851
|
+
store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
852
|
+
store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
853
|
+
store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
854
|
+
store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
855
|
+
store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
856
|
+
store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
|
857
|
+
store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
858
|
+
store 'dest_wac', :field_name => 'DEST_WAC'
|
859
|
+
store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
|
860
|
+
store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
|
861
|
+
store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
|
862
|
+
store 'year', :field_name => 'YEAR'
|
863
|
+
store 'quarter', :field_name => 'QUARTER'
|
864
|
+
store 'month', :field_name => 'MONTH'
|
865
|
+
store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
|
866
|
+
store 'bts_service_class', :field_name => 'CLASS'
|
867
|
+
store 'data_source', :field_name => 'DATA_SOURCE'
|
868
|
+
end
|
869
|
+
end
|
870
|
+
|
871
|
+
process 'Derive freight share as a fraction of payload' do
|
872
|
+
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
873
|
+
end
|
874
|
+
|
875
|
+
process 'Derive load factor, which is passengers divided by the total seats available' do
|
876
|
+
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
877
|
+
end
|
878
|
+
|
879
|
+
process 'Derive average seats per departure' do
|
880
|
+
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
881
|
+
end
|
882
|
+
end
|
883
|
+
end
|
884
|
+
|
639
885
|
# todo: have somebody properly organize these
|
640
886
|
class DataMinerTest < Test::Unit::TestCase
|
641
887
|
if ENV['FAST'] == 'true'
|
@@ -733,9 +979,14 @@ class DataMinerTest < Test::Unit::TestCase
|
|
733
979
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
734
980
|
end
|
735
981
|
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
982
|
+
should "mine T100 flight segments" do
|
983
|
+
T100FlightSegment.run_data_miner!
|
984
|
+
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
985
|
+
end
|
986
|
+
|
987
|
+
should "mine residence survey day" do
|
988
|
+
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
989
|
+
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
|
990
|
+
end
|
740
991
|
end
|
741
992
|
end
|
data/test/test_helper.rb
CHANGED
@@ -18,6 +18,71 @@ class Test::Unit::TestCase
|
|
18
18
|
end
|
19
19
|
|
20
20
|
ActiveRecord::Schema.define(:version => 20090819143429) do
|
21
|
+
create_table "t100_flight_segments", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
|
+
t.integer "departures_performed"
|
23
|
+
t.integer "payload"
|
24
|
+
t.integer "seats"
|
25
|
+
t.integer "passengers"
|
26
|
+
t.integer "freight"
|
27
|
+
t.integer "mail"
|
28
|
+
t.integer "ramp_to_ramp"
|
29
|
+
t.integer "air_time"
|
30
|
+
t.float "load_factor"
|
31
|
+
t.float "freight_share"
|
32
|
+
t.integer "distance"
|
33
|
+
t.integer "departures_scheduled"
|
34
|
+
t.string "unique_carrier"
|
35
|
+
t.integer "dot_airline_id"
|
36
|
+
t.string "unique_carrier_name"
|
37
|
+
t.string "unique_carrier_entity"
|
38
|
+
t.string "region"
|
39
|
+
t.string "carrier"
|
40
|
+
t.string "carrier_name"
|
41
|
+
t.integer "carrier_group"
|
42
|
+
t.integer "carrier_group_new"
|
43
|
+
t.string "origin_airport_iata"
|
44
|
+
t.string "origin_city_name"
|
45
|
+
t.integer "origin_city_num"
|
46
|
+
t.string "origin_state_abr"
|
47
|
+
t.string "origin_state_fips"
|
48
|
+
t.string "origin_state_nm"
|
49
|
+
t.string "origin_country_iso_3166"
|
50
|
+
t.string "origin_country_name"
|
51
|
+
t.integer "origin_wac"
|
52
|
+
t.string "dest_airport_iata"
|
53
|
+
t.string "dest_city_name"
|
54
|
+
t.integer "dest_city_num"
|
55
|
+
t.string "dest_state_abr"
|
56
|
+
t.string "dest_state_fips"
|
57
|
+
t.string "dest_state_nm"
|
58
|
+
t.string "dest_country_iso_3166"
|
59
|
+
t.string "dest_country_name"
|
60
|
+
t.integer "dest_wac"
|
61
|
+
t.integer "bts_aircraft_group"
|
62
|
+
t.integer "bts_aircraft_type"
|
63
|
+
t.integer "bts_aircraft_config"
|
64
|
+
t.integer "year"
|
65
|
+
t.integer "quarter"
|
66
|
+
t.integer "month"
|
67
|
+
t.integer "bts_distance_group"
|
68
|
+
t.string "bts_service_class"
|
69
|
+
t.string "data_source"
|
70
|
+
t.float "seats_per_departure"
|
71
|
+
|
72
|
+
t.string 'payload_units'
|
73
|
+
t.string 'freight_units'
|
74
|
+
t.string 'mail_units'
|
75
|
+
t.string 'distance_units'
|
76
|
+
|
77
|
+
t.datetime "created_at"
|
78
|
+
t.datetime "updated_at"
|
79
|
+
|
80
|
+
t.string "row_hash"
|
81
|
+
t.integer 'data_miner_touch_count'
|
82
|
+
t.integer 'data_miner_last_run_id'
|
83
|
+
end
|
84
|
+
execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
|
85
|
+
|
21
86
|
create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
87
|
t.string 'iata_code'
|
23
88
|
t.string 'name'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2010-03-
|
13
|
+
date: 2010-03-26 00:00:00 -04:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - ~>
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 0.2.
|
24
|
+
version: 0.2.6
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: activerecord
|