data_miner 1.3.8 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +42 -0
- data/Gemfile +19 -3
- data/README.rdoc +3 -3
- data/Rakefile +13 -15
- data/data_miner.gemspec +4 -15
- data/lib/data_miner.rb +69 -70
- data/lib/data_miner/active_record_extensions.rb +17 -22
- data/lib/data_miner/attribute.rb +176 -179
- data/lib/data_miner/dictionary.rb +38 -31
- data/lib/data_miner/run.rb +49 -18
- data/lib/data_miner/script.rb +116 -0
- data/lib/data_miner/step.rb +5 -0
- data/lib/data_miner/step/import.rb +74 -0
- data/lib/data_miner/step/process.rb +34 -0
- data/lib/data_miner/step/tap.rb +134 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +26 -24
- data/test/support/breeds.xls +0 -0
- data/test/support/pet_color_dictionary.en.csv +5 -0
- data/test/support/pet_color_dictionary.es.csv +5 -0
- data/test/support/pets.csv +5 -0
- data/test/support/pets_funny.csv +4 -0
- data/test/test_data_miner.rb +103 -0
- data/test/test_earth_import.rb +25 -0
- data/test/test_earth_tap.rb +25 -0
- data/test/test_safety.rb +43 -0
- metadata +72 -78
- data/.document +0 -5
- data/lib/data_miner/config.rb +0 -124
- data/lib/data_miner/import.rb +0 -93
- data/lib/data_miner/process.rb +0 -38
- data/lib/data_miner/tap.rb +0 -143
- data/test/support/aircraft.rb +0 -102
- data/test/support/airport.rb +0 -16
- data/test/support/automobile_fuel_type.rb +0 -40
- data/test/support/automobile_variant.rb +0 -362
- data/test/support/country.rb +0 -15
- data/test/support/test_database.rb +0 -311
- data/test/test_data_miner_attribute.rb +0 -111
- data/test/test_data_miner_process.rb +0 -18
- data/test/test_old_syntax.rb +0 -825
- data/test/test_tap.rb +0 -21
@@ -1,111 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
TestDatabase.load_models
|
5
|
-
|
6
|
-
class TestDataMinerAttribute < Test::Unit::TestCase
|
7
|
-
context '#value_from_row' do
|
8
|
-
setup do
|
9
|
-
@airport = Airport.new
|
10
|
-
end
|
11
|
-
context 'nullify is true' do
|
12
|
-
setup do
|
13
|
-
@attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
|
14
|
-
end
|
15
|
-
should 'return nil if field is blank' do
|
16
|
-
assert_nil @attribute.value_from_row(
|
17
|
-
'name' => 'DTW',
|
18
|
-
'city' => 'Warren',
|
19
|
-
'country_name' => 'US',
|
20
|
-
'latitude' => '',
|
21
|
-
'longitude' => ''
|
22
|
-
)
|
23
|
-
end
|
24
|
-
should 'return the value if field is not blank' do
|
25
|
-
assert_equal '12.34', @attribute.value_from_row(
|
26
|
-
'name' => 'DTW',
|
27
|
-
'city' => 'Warren',
|
28
|
-
'country_name' => 'US',
|
29
|
-
'latitude' => '12.34',
|
30
|
-
'longitude' => ''
|
31
|
-
)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
context 'nullify is false' do
|
35
|
-
setup do
|
36
|
-
@attribute = DataMiner::Attribute.new @airport, 'latitude'
|
37
|
-
end
|
38
|
-
should 'return the value if field is not blank' do
|
39
|
-
assert_equal '12.34', @attribute.value_from_row(
|
40
|
-
'name' => 'DTW',
|
41
|
-
'city' => 'Warren',
|
42
|
-
'country_name' => 'US',
|
43
|
-
'latitude' => '12.34',
|
44
|
-
'longitude' => ''
|
45
|
-
)
|
46
|
-
end
|
47
|
-
should 'return blank if field is blank' do
|
48
|
-
assert_equal '', @attribute.value_from_row(
|
49
|
-
'name' => 'DTW',
|
50
|
-
'city' => 'Warren',
|
51
|
-
'country_name' => 'US',
|
52
|
-
'latitude' => '',
|
53
|
-
'longitude' => ''
|
54
|
-
)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context '#set_record_from_row' do
|
60
|
-
setup do
|
61
|
-
@automobile_fuel_type = AutomobileFuelType.new
|
62
|
-
end
|
63
|
-
context 'nullify is true, wants units' do
|
64
|
-
setup do
|
65
|
-
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
|
66
|
-
end
|
67
|
-
should 'set value and units to nil if field is blank' do
|
68
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
69
|
-
'name' => 'electricity',
|
70
|
-
'annual_distance' => '',
|
71
|
-
'annual_distance_units' => ''
|
72
|
-
)
|
73
|
-
assert_nil @automobile_fuel_type.annual_distance
|
74
|
-
assert_nil @automobile_fuel_type.annual_distance_units
|
75
|
-
end
|
76
|
-
should 'set value and units if field is not blank' do
|
77
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
78
|
-
'name' => 'electricity',
|
79
|
-
'annual_distance' => '100.0',
|
80
|
-
'annual_distance_units' => 'kilometres'
|
81
|
-
)
|
82
|
-
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
83
|
-
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context 'nullify is false, wants units' do
|
88
|
-
setup do
|
89
|
-
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
|
90
|
-
end
|
91
|
-
should 'set value and units to blank if field is blank' do
|
92
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
93
|
-
'name' => 'electricity',
|
94
|
-
'annual_distance' => '',
|
95
|
-
'annual_distance_units' => ''
|
96
|
-
)
|
97
|
-
assert_equal 0.0, @automobile_fuel_type.annual_distance
|
98
|
-
assert_equal '', @automobile_fuel_type.annual_distance_units
|
99
|
-
end
|
100
|
-
should 'set value and units if field is not blank' do
|
101
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
102
|
-
'name' => 'electricity',
|
103
|
-
'annual_distance' => '100.0',
|
104
|
-
'annual_distance_units' => 'kilometres'
|
105
|
-
)
|
106
|
-
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
107
|
-
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
class TestDataMinerProcess < Test::Unit::TestCase
|
5
|
-
context '#inspect' do
|
6
|
-
setup do
|
7
|
-
@config = DataMiner::Config.new 'foo'
|
8
|
-
end
|
9
|
-
should 'describe a block' do
|
10
|
-
process = DataMiner::Process.new(@config, 'something cool') { }
|
11
|
-
assert_match /something cool/, process.inspect
|
12
|
-
end
|
13
|
-
should 'describe a method' do
|
14
|
-
process = DataMiner::Process.new @config, :something_cool
|
15
|
-
assert_match /something_cool/, process.inspect
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/test/test_old_syntax.rb
DELETED
@@ -1,825 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
TestDatabase.load_models
|
5
|
-
|
6
|
-
class CensusRegion < ActiveRecord::Base
|
7
|
-
set_primary_key :number
|
8
|
-
|
9
|
-
data_miner do
|
10
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
11
|
-
key 'number', :field_name => 'Region'
|
12
|
-
store 'name', :field_name => 'Name'
|
13
|
-
end
|
14
|
-
|
15
|
-
# pretend this is a different data source
|
16
|
-
# fake! just for testing purposes
|
17
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
18
|
-
key 'number', :field_name => 'Region'
|
19
|
-
store 'name', :field_name => 'Name'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# smaller than a region
|
25
|
-
class CensusDivision < ActiveRecord::Base
|
26
|
-
set_primary_key :number
|
27
|
-
|
28
|
-
data_miner do
|
29
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
30
|
-
key 'number', :field_name => 'Division'
|
31
|
-
store 'name', :field_name => 'Name'
|
32
|
-
store 'census_region_number', :field_name => 'Region'
|
33
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class CensusDivisionDeux < ActiveRecord::Base
|
39
|
-
set_primary_key :number
|
40
|
-
|
41
|
-
data_miner do
|
42
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
43
|
-
key 'number', :field_name => 'Division'
|
44
|
-
store 'name', :field_name => 'Name'
|
45
|
-
store 'census_region_number', :field_name => 'Region'
|
46
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => DataMiner::Dictionary.new(:input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv')
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class CrosscallingCensusRegion < ActiveRecord::Base
|
52
|
-
set_primary_key :number
|
53
|
-
|
54
|
-
has_many :crosscalling_census_divisions
|
55
|
-
|
56
|
-
data_miner do
|
57
|
-
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
58
|
-
CrosscallingCensusDivision.run_data_miner!
|
59
|
-
connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
|
60
|
-
t.column :number, :integer
|
61
|
-
t.column :name, :string
|
62
|
-
end
|
63
|
-
connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
|
64
|
-
connection.execute %{
|
65
|
-
INSERT IGNORE INTO crosscalling_census_regions(number, name)
|
66
|
-
SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
|
67
|
-
}
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
class CrosscallingCensusDivision < ActiveRecord::Base
|
73
|
-
set_primary_key :number
|
74
|
-
|
75
|
-
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
76
|
-
|
77
|
-
data_miner do
|
78
|
-
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
79
|
-
key 'number', :field_name => 'Division'
|
80
|
-
store 'name', :field_name => 'Name'
|
81
|
-
store 'census_region_number', :field_name => 'Region'
|
82
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
83
|
-
end
|
84
|
-
|
85
|
-
process "make sure my parent object is set up (i.e., cross-call it)" do
|
86
|
-
CrosscallingCensusRegion.run_data_miner!
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
92
|
-
set_primary_key :department_of_energy_identifier
|
93
|
-
|
94
|
-
data_miner do
|
95
|
-
process 'Define some unit conversions' do
|
96
|
-
Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
|
97
|
-
Conversions.register :square_feet, :square_metres, 0.09290304
|
98
|
-
end
|
99
|
-
|
100
|
-
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
101
|
-
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
102
|
-
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
103
|
-
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
104
|
-
|
105
|
-
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
106
|
-
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
107
|
-
store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
108
|
-
store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
|
109
|
-
store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
|
110
|
-
store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
|
111
|
-
store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
|
112
|
-
store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
|
113
|
-
store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
|
114
|
-
|
115
|
-
store 'census_division_number', :field_name => 'DIVISION'
|
116
|
-
store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
117
|
-
store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
118
|
-
store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
119
|
-
|
120
|
-
store 'floorspace', :field_name => 'TOTSQFT'
|
121
|
-
store 'residents', :field_name => 'NHSLDMEM'
|
122
|
-
store 'ownership', :field_name => 'KOWNRENT'
|
123
|
-
store 'thermostat_programmability', :field_name => 'PROTHERM'
|
124
|
-
store 'refrigerator_count', :field_name => 'NUMFRIG'
|
125
|
-
store 'freezer_count', :field_name => 'NUMFREEZ'
|
126
|
-
store 'heating_degree_days', :field_name => 'HD65'
|
127
|
-
store 'cooling_degree_days', :field_name => 'CD65'
|
128
|
-
store 'annual_energy_from_fuel_oil_for_heating_space', :field_name => 'BTUFOSPH'
|
129
|
-
store 'annual_energy_from_fuel_oil_for_heating_water', :field_name => 'BTUFOWTH'
|
130
|
-
store 'annual_energy_from_fuel_oil_for_appliances', :field_name => 'BTUFOAPL'
|
131
|
-
store 'annual_energy_from_natural_gas_for_heating_space', :field_name => 'BTUNGSPH'
|
132
|
-
store 'annual_energy_from_natural_gas_for_heating_water', :field_name => 'BTUNGWTH'
|
133
|
-
store 'annual_energy_from_natural_gas_for_appliances', :field_name => 'BTUNGAPL'
|
134
|
-
store 'annual_energy_from_propane_for_heating_space', :field_name => 'BTULPSPH'
|
135
|
-
store 'annual_energy_from_propane_for_heating_water', :field_name => 'BTULPWTH'
|
136
|
-
store 'annual_energy_from_propane_for_appliances', :field_name => 'BTULPAPL'
|
137
|
-
store 'annual_energy_from_wood', :field_name => 'BTUWOOD'
|
138
|
-
store 'annual_energy_from_kerosene', :field_name => 'BTUKER'
|
139
|
-
store 'annual_energy_from_electricity_for_clothes_driers', :field_name => 'BTUELCDR'
|
140
|
-
store 'annual_energy_from_electricity_for_dishwashers', :field_name => 'BTUELDWH'
|
141
|
-
store 'annual_energy_from_electricity_for_freezers', :field_name => 'BTUELFZZ'
|
142
|
-
store 'annual_energy_from_electricity_for_refrigerators', :field_name => 'BTUELRFG'
|
143
|
-
store 'annual_energy_from_electricity_for_air_conditioners', :field_name => 'BTUELCOL'
|
144
|
-
store 'annual_energy_from_electricity_for_heating_space', :field_name => 'BTUELSPH'
|
145
|
-
store 'annual_energy_from_electricity_for_heating_water', :field_name => 'BTUELWTH'
|
146
|
-
store 'annual_energy_from_electricity_for_other_appliances', :field_name => 'BTUELAPL'
|
147
|
-
store 'weighting', :field_name => 'NWEIGHT'
|
148
|
-
store 'total_rooms', :field_name => 'TOTROOMS'
|
149
|
-
store 'bathrooms', :field_name => 'NCOMBATH'
|
150
|
-
store 'halfbaths', :field_name => 'NHAFBATH'
|
151
|
-
store 'heated_garage', :field_name => 'GARGHEAT'
|
152
|
-
store 'attached_1car_garage', :field_name => 'GARAGE1C'
|
153
|
-
store 'detached_1car_garage', :field_name => 'DGARG1C'
|
154
|
-
store 'attached_2car_garage', :field_name => 'GARAGE2C'
|
155
|
-
store 'detached_2car_garage', :field_name => 'DGARG2C'
|
156
|
-
store 'attached_3car_garage', :field_name => 'GARAGE3C'
|
157
|
-
store 'detached_3car_garage', :field_name => 'DGARG3C'
|
158
|
-
store 'lights_on_1_to_4_hours', :field_name => 'LGT1'
|
159
|
-
store 'efficient_lights_on_1_to_4_hours', :field_name => 'LGT1EE'
|
160
|
-
store 'lights_on_4_to_12_hours', :field_name => 'LGT4'
|
161
|
-
store 'efficient_lights_on_4_to_12_hours', :field_name => 'LGT4EE'
|
162
|
-
store 'lights_on_over_12_hours', :field_name => 'LGT12'
|
163
|
-
store 'efficient_lights_on_over_12_hours', :field_name => 'LGT12EE'
|
164
|
-
store 'outdoor_all_night_lights', :field_name => 'NOUTLGTNT'
|
165
|
-
store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
|
166
|
-
end
|
167
|
-
|
168
|
-
# Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
|
169
|
-
# This makes it easier to derive useful information like "how many rooms does the house have?"
|
170
|
-
process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
|
171
|
-
%w{
|
172
|
-
annual_energy_from_electricity_for_air_conditioners
|
173
|
-
annual_energy_from_electricity_for_clothes_driers
|
174
|
-
annual_energy_from_electricity_for_dishwashers
|
175
|
-
annual_energy_from_electricity_for_freezers
|
176
|
-
annual_energy_from_electricity_for_heating_space
|
177
|
-
annual_energy_from_electricity_for_heating_water
|
178
|
-
annual_energy_from_electricity_for_other_appliances
|
179
|
-
annual_energy_from_electricity_for_refrigerators
|
180
|
-
annual_energy_from_fuel_oil_for_appliances
|
181
|
-
annual_energy_from_fuel_oil_for_heating_space
|
182
|
-
annual_energy_from_fuel_oil_for_heating_water
|
183
|
-
annual_energy_from_kerosene
|
184
|
-
annual_energy_from_propane_for_appliances
|
185
|
-
annual_energy_from_propane_for_heating_space
|
186
|
-
annual_energy_from_propane_for_heating_water
|
187
|
-
annual_energy_from_natural_gas_for_appliances
|
188
|
-
annual_energy_from_natural_gas_for_heating_space
|
189
|
-
annual_energy_from_natural_gas_for_heating_water
|
190
|
-
annual_energy_from_wood
|
191
|
-
lights_on_1_to_4_hours
|
192
|
-
lights_on_over_12_hours
|
193
|
-
efficient_lights_on_over_12_hours
|
194
|
-
efficient_lights_on_1_to_4_hours
|
195
|
-
lights_on_4_to_12_hours
|
196
|
-
efficient_lights_on_4_to_12_hours
|
197
|
-
outdoor_all_night_gas_lights
|
198
|
-
outdoor_all_night_lights
|
199
|
-
thermostat_programmability
|
200
|
-
detached_1car_garage
|
201
|
-
detached_2car_garage
|
202
|
-
detached_3car_garage
|
203
|
-
attached_1car_garage
|
204
|
-
attached_2car_garage
|
205
|
-
attached_3car_garage
|
206
|
-
heated_garage
|
207
|
-
}.each do |attr_name|
|
208
|
-
max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
|
209
|
-
# if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
|
210
|
-
if /^9+$/.match(max.to_i.to_s)
|
211
|
-
update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
|
212
|
-
end
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
|
217
|
-
[
|
218
|
-
[ 'floorspace', :square_feet, :square_metres ],
|
219
|
-
[ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
|
220
|
-
[ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
|
221
|
-
[ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
|
222
|
-
[ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
|
223
|
-
[ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
|
224
|
-
[ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
|
225
|
-
[ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
|
226
|
-
[ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
|
227
|
-
[ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
|
228
|
-
[ 'annual_energy_from_wood', :kbtus, :joules ],
|
229
|
-
[ 'annual_energy_from_kerosene', :kbtus, :joules ],
|
230
|
-
[ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
|
231
|
-
[ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
|
232
|
-
[ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
|
233
|
-
[ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
|
234
|
-
[ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
|
235
|
-
[ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
|
236
|
-
[ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
|
237
|
-
[ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
|
238
|
-
].each do |attr_name, from_units, to_units|
|
239
|
-
update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
process 'Add a new field "rooms" that estimates how many rooms are in the house' do
|
244
|
-
update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
|
245
|
-
end
|
246
|
-
|
247
|
-
process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
|
248
|
-
update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
|
249
|
-
end
|
250
|
-
|
251
|
-
process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
|
252
|
-
update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
|
253
|
-
end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
# T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
|
258
|
-
class T100FlightSegment < ActiveRecord::Base
|
259
|
-
set_primary_key :row_hash
|
260
|
-
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
261
|
-
FORM_DATA = %{
|
262
|
-
UserTableName=T_100_Segment__All_Carriers&
|
263
|
-
DBShortName=Air_Carriers&
|
264
|
-
RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
|
265
|
-
sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
|
266
|
-
varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
|
267
|
-
grouplist=&
|
268
|
-
suml=&
|
269
|
-
sumRegion=&
|
270
|
-
filter1=title%3D&
|
271
|
-
filter2=title%3D&
|
272
|
-
geo=All%A0&
|
273
|
-
time=__MONTH_NAME__&
|
274
|
-
timename=Month&
|
275
|
-
GEOGRAPHY=All&
|
276
|
-
XYEAR=__YEAR__&
|
277
|
-
FREQUENCY=__MONTH_NUMBER__&
|
278
|
-
AllVars=All&
|
279
|
-
VarName=DEPARTURES_SCHEDULED&
|
280
|
-
VarDesc=DepScheduled&
|
281
|
-
VarType=Num&
|
282
|
-
VarName=DEPARTURES_PERFORMED&
|
283
|
-
VarDesc=DepPerformed&
|
284
|
-
VarType=Num&
|
285
|
-
VarName=PAYLOAD&
|
286
|
-
VarDesc=Payload&
|
287
|
-
VarType=Num&
|
288
|
-
VarName=SEATS&
|
289
|
-
VarDesc=Seats&
|
290
|
-
VarType=Num&
|
291
|
-
VarName=PASSENGERS&
|
292
|
-
VarDesc=Passengers&
|
293
|
-
VarType=Num&
|
294
|
-
VarName=FREIGHT&
|
295
|
-
VarDesc=Freight&
|
296
|
-
VarType=Num&
|
297
|
-
VarName=MAIL&
|
298
|
-
VarDesc=Mail&
|
299
|
-
VarType=Num&
|
300
|
-
VarName=DISTANCE&
|
301
|
-
VarDesc=Distance&
|
302
|
-
VarType=Num&
|
303
|
-
VarName=RAMP_TO_RAMP&
|
304
|
-
VarDesc=RampToRamp&
|
305
|
-
VarType=Num&
|
306
|
-
VarName=AIR_TIME&
|
307
|
-
VarDesc=AirTime&
|
308
|
-
VarType=Num&
|
309
|
-
VarName=UNIQUE_CARRIER&
|
310
|
-
VarDesc=UniqueCarrier&
|
311
|
-
VarType=Char&
|
312
|
-
VarName=AIRLINE_ID&
|
313
|
-
VarDesc=AirlineID&
|
314
|
-
VarType=Num&
|
315
|
-
VarName=UNIQUE_CARRIER_NAME&
|
316
|
-
VarDesc=UniqueCarrierName&
|
317
|
-
VarType=Char&
|
318
|
-
VarName=UNIQUE_CARRIER_ENTITY&
|
319
|
-
VarDesc=UniqCarrierEntity&
|
320
|
-
VarType=Char&
|
321
|
-
VarName=REGION&
|
322
|
-
VarDesc=CarrierRegion&
|
323
|
-
VarType=Char&
|
324
|
-
VarName=CARRIER&
|
325
|
-
VarDesc=Carrier&
|
326
|
-
VarType=Char&
|
327
|
-
VarName=CARRIER_NAME&
|
328
|
-
VarDesc=CarrierName&
|
329
|
-
VarType=Char&
|
330
|
-
VarName=CARRIER_GROUP&
|
331
|
-
VarDesc=CarrierGroup&
|
332
|
-
VarType=Num&
|
333
|
-
VarName=CARRIER_GROUP_NEW&
|
334
|
-
VarDesc=CarrierGroupNew&
|
335
|
-
VarType=Num&
|
336
|
-
VarName=ORIGIN&
|
337
|
-
VarDesc=Origin&
|
338
|
-
VarType=Char&
|
339
|
-
VarName=ORIGIN_CITY_NAME&
|
340
|
-
VarDesc=OriginCityName&
|
341
|
-
VarType=Char&
|
342
|
-
VarName=ORIGIN_CITY_NUM&
|
343
|
-
VarDesc=OriginCityNum&
|
344
|
-
VarType=Num&
|
345
|
-
VarName=ORIGIN_STATE_ABR&
|
346
|
-
VarDesc=OriginState&
|
347
|
-
VarType=Char&
|
348
|
-
VarName=ORIGIN_STATE_FIPS&
|
349
|
-
VarDesc=OriginStateFips&
|
350
|
-
VarType=Char&
|
351
|
-
VarName=ORIGIN_STATE_NM&
|
352
|
-
VarDesc=OriginStateName&
|
353
|
-
VarType=Char&
|
354
|
-
VarName=ORIGIN_COUNTRY&
|
355
|
-
VarDesc=OriginCountry&
|
356
|
-
VarType=Char&
|
357
|
-
VarName=ORIGIN_COUNTRY_NAME&
|
358
|
-
VarDesc=OriginCountryName&
|
359
|
-
VarType=Char&
|
360
|
-
VarName=ORIGIN_WAC&
|
361
|
-
VarDesc=OriginWac&
|
362
|
-
VarType=Num&
|
363
|
-
VarName=DEST&
|
364
|
-
VarDesc=Dest&
|
365
|
-
VarType=Char&
|
366
|
-
VarName=DEST_CITY_NAME&
|
367
|
-
VarDesc=DestCityName&
|
368
|
-
VarType=Char&
|
369
|
-
VarName=DEST_CITY_NUM&
|
370
|
-
VarDesc=DestCityNum&
|
371
|
-
VarType=Num&
|
372
|
-
VarName=DEST_STATE_ABR&
|
373
|
-
VarDesc=DestState&
|
374
|
-
VarType=Char&
|
375
|
-
VarName=DEST_STATE_FIPS&
|
376
|
-
VarDesc=DestStateFips&
|
377
|
-
VarType=Char&
|
378
|
-
VarName=DEST_STATE_NM&
|
379
|
-
VarDesc=DestStateName&
|
380
|
-
VarType=Char&
|
381
|
-
VarName=DEST_COUNTRY&
|
382
|
-
VarDesc=DestCountry&
|
383
|
-
VarType=Char&
|
384
|
-
VarName=DEST_COUNTRY_NAME&
|
385
|
-
VarDesc=DestCountryName&
|
386
|
-
VarType=Char&
|
387
|
-
VarName=DEST_WAC&
|
388
|
-
VarDesc=DestWac&
|
389
|
-
VarType=Num&
|
390
|
-
VarName=AIRCRAFT_GROUP&
|
391
|
-
VarDesc=AircraftGroup&
|
392
|
-
VarType=Num&
|
393
|
-
VarName=AIRCRAFT_TYPE&
|
394
|
-
VarDesc=AircraftType&
|
395
|
-
VarType=Char&
|
396
|
-
VarName=AIRCRAFT_CONFIG&
|
397
|
-
VarDesc=AircraftConfig&
|
398
|
-
VarType=Num&
|
399
|
-
VarName=YEAR&
|
400
|
-
VarDesc=Year&
|
401
|
-
VarType=Num&
|
402
|
-
VarName=QUARTER&
|
403
|
-
VarDesc=Quarter&
|
404
|
-
VarType=Num&
|
405
|
-
VarName=MONTH&
|
406
|
-
VarDesc=Month&
|
407
|
-
VarType=Num&
|
408
|
-
VarName=DISTANCE_GROUP&
|
409
|
-
VarDesc=DistanceGroup&
|
410
|
-
VarType=Num&
|
411
|
-
VarName=CLASS&
|
412
|
-
VarDesc=Class&
|
413
|
-
VarType=Char&
|
414
|
-
VarName=DATA_SOURCE&
|
415
|
-
VarDesc=DataSource&
|
416
|
-
VarType=Char
|
417
|
-
}.gsub /[\s]+/,''
|
418
|
-
|
419
|
-
data_miner do
|
420
|
-
months = Hash.new
|
421
|
-
# (2008..2009).each do |year|
|
422
|
-
(2008..2008).each do |year|
|
423
|
-
# (1..12).each do |month|
|
424
|
-
(1..1).each do |month|
|
425
|
-
time = Time.gm year, month
|
426
|
-
form_data = FORM_DATA.dup
|
427
|
-
form_data.gsub! '__YEAR__', time.year.to_s
|
428
|
-
form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
|
429
|
-
form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
|
430
|
-
months[time] = form_data
|
431
|
-
end
|
432
|
-
end
|
433
|
-
months.each do |month, form_data|
|
434
|
-
import "T100 data from #{month.strftime('%B %Y')}",
|
435
|
-
:url => URL,
|
436
|
-
:form_data => form_data,
|
437
|
-
:compression => :zip,
|
438
|
-
:glob => '/*.csv' do
|
439
|
-
key 'row_hash'
|
440
|
-
store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
441
|
-
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
442
|
-
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
443
|
-
store 'seats', :field_name => 'SEATS'
|
444
|
-
store 'passengers', :field_name => 'PASSENGERS'
|
445
|
-
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
446
|
-
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
447
|
-
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
448
|
-
store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
|
449
|
-
store 'air_time', :field_name => 'AIR_TIME'
|
450
|
-
store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
|
451
|
-
store 'dot_airline_id', :field_name => 'AIRLINE_ID'
|
452
|
-
store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
453
|
-
store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
454
|
-
store 'region', :field_name => 'REGION'
|
455
|
-
store 'carrier', :field_name => 'CARRIER'
|
456
|
-
store 'carrier_name', :field_name => 'CARRIER_NAME'
|
457
|
-
store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
458
|
-
store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
459
|
-
store 'origin_airport_iata', :field_name => 'ORIGIN'
|
460
|
-
store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
461
|
-
store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
462
|
-
store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
463
|
-
store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
464
|
-
store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
465
|
-
store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
|
466
|
-
store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
467
|
-
store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
468
|
-
store 'dest_airport_iata', :field_name => 'DEST'
|
469
|
-
store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
470
|
-
store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
471
|
-
store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
472
|
-
store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
473
|
-
store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
474
|
-
store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
|
475
|
-
store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
476
|
-
store 'dest_wac', :field_name => 'DEST_WAC'
|
477
|
-
store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
|
478
|
-
store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
|
479
|
-
store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
|
480
|
-
store 'year', :field_name => 'YEAR'
|
481
|
-
store 'quarter', :field_name => 'QUARTER'
|
482
|
-
store 'month', :field_name => 'MONTH'
|
483
|
-
store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
|
484
|
-
store 'bts_service_class', :field_name => 'CLASS'
|
485
|
-
store 'data_source', :field_name => 'DATA_SOURCE'
|
486
|
-
end
|
487
|
-
end
|
488
|
-
|
489
|
-
process 'Derive freight share as a fraction of payload' do
|
490
|
-
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
491
|
-
end
|
492
|
-
|
493
|
-
process 'Derive load factor, which is passengers divided by the total seats available' do
|
494
|
-
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
495
|
-
end
|
496
|
-
|
497
|
-
process 'Derive average seats per departure' do
|
498
|
-
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|
502
|
-
|
503
|
-
# note that this depends on stuff in Aircraft
|
504
|
-
class AircraftDeux < ActiveRecord::Base
|
505
|
-
set_primary_key :icao_code
|
506
|
-
|
507
|
-
# defined on the class because we defined the errata with a shorthand
|
508
|
-
class << self
|
509
|
-
def is_not_attributed_to_aerospatiale?(row)
|
510
|
-
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
511
|
-
end
|
512
|
-
|
513
|
-
def is_not_attributed_to_cessna?(row)
|
514
|
-
not row['Manufacturer'] =~ /CESSNA/i
|
515
|
-
end
|
516
|
-
|
517
|
-
def is_not_attributed_to_learjet?(row)
|
518
|
-
not row['Manufacturer'] =~ /LEAR/i
|
519
|
-
end
|
520
|
-
|
521
|
-
def is_not_attributed_to_dehavilland?(row)
|
522
|
-
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
523
|
-
end
|
524
|
-
|
525
|
-
def is_not_attributed_to_mcdonnell_douglas?(row)
|
526
|
-
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
527
|
-
end
|
528
|
-
|
529
|
-
def is_not_a_dc_plane?(row)
|
530
|
-
not row['Model'] =~ /DC/i
|
531
|
-
end
|
532
|
-
|
533
|
-
def is_a_crj_900?(row)
|
534
|
-
row['Designator'].downcase == 'crj9'
|
535
|
-
end
|
536
|
-
end
|
537
|
-
|
538
|
-
data_miner do
|
539
|
-
# ('A'..'Z').each do |letter|
|
540
|
-
# Note: for the purposes of testing, only importing "D"
|
541
|
-
%w{ D }.each do |letter|
|
542
|
-
import("ICAO codes starting with letter #{letter} used by the FAA",
|
543
|
-
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
544
|
-
:encoding => 'windows-1252',
|
545
|
-
:errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
|
546
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
547
|
-
:column_xpath => 'td') do
|
548
|
-
key 'icao_code', :field_name => 'Designator'
|
549
|
-
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
550
|
-
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
551
|
-
store 'manufacturer_name', :field_name => 'Manufacturer'
|
552
|
-
store 'name', :field_name => 'Model'
|
553
|
-
end
|
554
|
-
end
|
555
|
-
end
|
556
|
-
end
|
557
|
-
|
558
|
-
class AutomobileMakeFleetYear < ActiveRecord::Base
|
559
|
-
set_primary_key :name
|
560
|
-
|
561
|
-
col :name
|
562
|
-
col :make_name
|
563
|
-
col :fleet
|
564
|
-
col :year, :type => :integer
|
565
|
-
col :fuel_efficiency, :type => :float
|
566
|
-
col :fuel_efficiency_units
|
567
|
-
col :volume, :type => :integer
|
568
|
-
col :make_year_name
|
569
|
-
col :created_at, :type => :datetime
|
570
|
-
col :updated_at, :type => :datetime
|
571
|
-
|
572
|
-
data_miner do
|
573
|
-
process :auto_upgrade!
|
574
|
-
|
575
|
-
process "finish if i tell you to" do
|
576
|
-
raise DataMiner::Finish if $force_finish
|
577
|
-
end
|
578
|
-
|
579
|
-
process "skip if i tell you to" do
|
580
|
-
raise DataMiner::Skip if $force_skip
|
581
|
-
end
|
582
|
-
|
583
|
-
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
584
|
-
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
|
585
|
-
:errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
|
586
|
-
:select => lambda { |row| row['volume'].to_i > 0 } do
|
587
|
-
key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
|
588
|
-
store 'make_name', :field_name => 'manufacturer_name'
|
589
|
-
store 'year', :field_name => 'year_content'
|
590
|
-
store 'fleet', :chars => 2..3 # zero-based
|
591
|
-
store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
592
|
-
store 'volume'
|
593
|
-
end
|
594
|
-
end
|
595
|
-
end
|
596
|
-
|
597
|
-
class CensusDivisionTrois < ActiveRecord::Base
|
598
|
-
set_primary_key :number_code
|
599
|
-
|
600
|
-
col :number_code
|
601
|
-
col :name
|
602
|
-
col :census_region_name
|
603
|
-
col :census_region_number, :type => :integer
|
604
|
-
add_index 'census_region_name', :name => 'homefry'
|
605
|
-
add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
606
|
-
|
607
|
-
data_miner do
|
608
|
-
process :auto_upgrade!
|
609
|
-
end
|
610
|
-
end
|
611
|
-
|
612
|
-
class CensusDivisionFour < ActiveRecord::Base
|
613
|
-
col :number_code
|
614
|
-
col :name
|
615
|
-
col :census_region_name
|
616
|
-
col :census_region_number, :type => :integer
|
617
|
-
add_index 'census_region_name', :name => 'homefry'
|
618
|
-
|
619
|
-
data_miner do
|
620
|
-
process :auto_upgrade!
|
621
|
-
end
|
622
|
-
end
|
623
|
-
|
624
|
-
# todo: have somebody properly organize these
|
625
|
-
class TestOldSyntax < Test::Unit::TestCase
|
626
|
-
if ENV['WIP']
|
627
|
-
context 'with nullify option' do
|
628
|
-
should 'treat blank fields as null values' do
|
629
|
-
Aircraft.delete_all
|
630
|
-
Aircraft.data_miner_runs.delete_all
|
631
|
-
Aircraft.run_data_miner!
|
632
|
-
assert_greater_than 0, Aircraft.count
|
633
|
-
assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
|
634
|
-
end
|
635
|
-
end
|
636
|
-
end
|
637
|
-
|
638
|
-
if ENV['ALL'] == 'true'
|
639
|
-
should 'directly create a table for the model' do
|
640
|
-
if AutomobileMakeFleetYear.table_exists?
|
641
|
-
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
642
|
-
end
|
643
|
-
AutomobileMakeFleetYear.auto_upgrade!
|
644
|
-
assert AutomobileMakeFleetYear.table_exists?
|
645
|
-
end
|
646
|
-
end
|
647
|
-
|
648
|
-
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
649
|
-
should 'append to an existing config' do
|
650
|
-
AutomobileFuelType.class_eval do
|
651
|
-
data_miner :append => true do
|
652
|
-
import 'example1', :url => 'http://example1.com' do
|
653
|
-
key 'code'
|
654
|
-
store 'name'
|
655
|
-
end
|
656
|
-
end
|
657
|
-
data_miner :append => true do
|
658
|
-
import 'example2', :url => 'http://example2.com' do
|
659
|
-
key 'code'
|
660
|
-
store 'name'
|
661
|
-
end
|
662
|
-
end
|
663
|
-
end
|
664
|
-
assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
|
665
|
-
assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
|
666
|
-
end
|
667
|
-
|
668
|
-
should 'override an existing data_miner configuration' do
|
669
|
-
AutomobileFuelType.class_eval do
|
670
|
-
data_miner do
|
671
|
-
import 'example', :url => 'http://example.com' do
|
672
|
-
key 'code'
|
673
|
-
store 'name'
|
674
|
-
end
|
675
|
-
end
|
676
|
-
end
|
677
|
-
assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
|
678
|
-
assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
|
679
|
-
end
|
680
|
-
should "stop and finish if it gets a DataMiner::Finish" do
|
681
|
-
AutomobileMakeFleetYear.delete_all
|
682
|
-
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
683
|
-
$force_finish = true
|
684
|
-
AutomobileMakeFleetYear.run_data_miner!
|
685
|
-
assert_equal 0, AutomobileMakeFleetYear.count
|
686
|
-
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
687
|
-
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
|
688
|
-
$force_finish = false
|
689
|
-
AutomobileMakeFleetYear.run_data_miner!
|
690
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
691
|
-
end
|
692
|
-
|
693
|
-
should "stop and register skipped if it gets a DataMiner::Skip" do
|
694
|
-
AutomobileMakeFleetYear.delete_all
|
695
|
-
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
696
|
-
$force_skip = true
|
697
|
-
AutomobileMakeFleetYear.run_data_miner!
|
698
|
-
assert_equal 0, AutomobileMakeFleetYear.count
|
699
|
-
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
700
|
-
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
|
701
|
-
$force_skip = false
|
702
|
-
AutomobileMakeFleetYear.run_data_miner!
|
703
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
704
|
-
end
|
705
|
-
|
706
|
-
should "allow specifying dictionaries explicitly" do
|
707
|
-
CensusDivisionDeux.run_data_miner!
|
708
|
-
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
709
|
-
end
|
710
|
-
|
711
|
-
should "be able to key on things other than the primary key" do
|
712
|
-
Aircraft.run_data_miner!
|
713
|
-
assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
|
714
|
-
end
|
715
|
-
|
716
|
-
should "be able to synthesize rows without using a full parser class" do
|
717
|
-
AutomobileMakeFleetYear.run_data_miner!
|
718
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
719
|
-
end
|
720
|
-
|
721
|
-
should "keep a call stack so that you can call run_data_miner! on a child" do
|
722
|
-
CrosscallingCensusDivision.run_data_miner!
|
723
|
-
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
724
|
-
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
725
|
-
end
|
726
|
-
|
727
|
-
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
728
|
-
CrosscallingCensusRegion.run_data_miner!
|
729
|
-
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
730
|
-
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
731
|
-
end
|
732
|
-
|
733
|
-
should "import airports" do
|
734
|
-
Airport.run_data_miner!
|
735
|
-
assert Airport.count > 0
|
736
|
-
end
|
737
|
-
|
738
|
-
should "pull in census divisions using a data.brighterplanet.com dictionary" do
|
739
|
-
CensusDivision.run_data_miner!
|
740
|
-
assert CensusDivision.count > 0
|
741
|
-
end
|
742
|
-
|
743
|
-
should "have a way to queue up runs that works with delated_job's send_later" do
|
744
|
-
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
745
|
-
end
|
746
|
-
|
747
|
-
should "be idempotent" do
|
748
|
-
Country.data_miner_config.run
|
749
|
-
a = Country.count
|
750
|
-
Country.data_miner_config.run
|
751
|
-
b = Country.count
|
752
|
-
assert_equal a, b
|
753
|
-
|
754
|
-
CensusRegion.data_miner_config.run
|
755
|
-
a = CensusRegion.count
|
756
|
-
CensusRegion.data_miner_config.run
|
757
|
-
b = CensusRegion.count
|
758
|
-
assert_equal a, b
|
759
|
-
end
|
760
|
-
|
761
|
-
should "hash things" do
|
762
|
-
AutomobileVariant.data_miner_config.steps[0].run
|
763
|
-
assert AutomobileVariant.first.row_hash.present?
|
764
|
-
end
|
765
|
-
|
766
|
-
should "process a callback block instead of a method" do
|
767
|
-
AutomobileVariant.delete_all
|
768
|
-
AutomobileVariant.data_miner_config.steps[0].run
|
769
|
-
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
770
|
-
AutomobileVariant.data_miner_config.steps.last.run
|
771
|
-
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
772
|
-
end
|
773
|
-
|
774
|
-
should "keep a log when it does a run" do
|
775
|
-
approx_started_at = Time.now
|
776
|
-
DataMiner.run :resource_names => %w{ Country }
|
777
|
-
approx_terminated_at = Time.now
|
778
|
-
last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
|
779
|
-
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
780
|
-
assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
|
781
|
-
end
|
782
|
-
|
783
|
-
should "request a re-import from scratch" do
|
784
|
-
c = Country.new
|
785
|
-
c.iso_3166 = 'JUNK'
|
786
|
-
c.save!
|
787
|
-
assert Country.exists?(:iso_3166 => 'JUNK')
|
788
|
-
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
789
|
-
assert !Country.exists?(:iso_3166 => 'JUNK')
|
790
|
-
end
|
791
|
-
|
792
|
-
should "know what runs were on a resource" do
|
793
|
-
DataMiner.run :resource_names => %w{ Country }
|
794
|
-
DataMiner.run :resource_names => %w{ Country }
|
795
|
-
assert Country.data_miner_runs.count > 0
|
796
|
-
end
|
797
|
-
end
|
798
|
-
|
799
|
-
if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
|
800
|
-
should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
|
801
|
-
AircraftDeux.run_data_miner!
|
802
|
-
assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
803
|
-
end
|
804
|
-
|
805
|
-
should "mine aircraft" do
|
806
|
-
Aircraft.run_data_miner!
|
807
|
-
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
808
|
-
end
|
809
|
-
|
810
|
-
should "mine automobile variants" do
|
811
|
-
AutomobileVariant.run_data_miner!
|
812
|
-
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
813
|
-
end
|
814
|
-
|
815
|
-
should "mine T100 flight segments" do
|
816
|
-
T100FlightSegment.run_data_miner!
|
817
|
-
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
818
|
-
end
|
819
|
-
|
820
|
-
should "mine residence survey responses" do
|
821
|
-
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
822
|
-
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
|
823
|
-
end
|
824
|
-
end
|
825
|
-
end
|