data_miner 1.3.8 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +42 -0
- data/Gemfile +19 -3
- data/README.rdoc +3 -3
- data/Rakefile +13 -15
- data/data_miner.gemspec +4 -15
- data/lib/data_miner.rb +69 -70
- data/lib/data_miner/active_record_extensions.rb +17 -22
- data/lib/data_miner/attribute.rb +176 -179
- data/lib/data_miner/dictionary.rb +38 -31
- data/lib/data_miner/run.rb +49 -18
- data/lib/data_miner/script.rb +116 -0
- data/lib/data_miner/step.rb +5 -0
- data/lib/data_miner/step/import.rb +74 -0
- data/lib/data_miner/step/process.rb +34 -0
- data/lib/data_miner/step/tap.rb +134 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +26 -24
- data/test/support/breeds.xls +0 -0
- data/test/support/pet_color_dictionary.en.csv +5 -0
- data/test/support/pet_color_dictionary.es.csv +5 -0
- data/test/support/pets.csv +5 -0
- data/test/support/pets_funny.csv +4 -0
- data/test/test_data_miner.rb +103 -0
- data/test/test_earth_import.rb +25 -0
- data/test/test_earth_tap.rb +25 -0
- data/test/test_safety.rb +43 -0
- metadata +72 -78
- data/.document +0 -5
- data/lib/data_miner/config.rb +0 -124
- data/lib/data_miner/import.rb +0 -93
- data/lib/data_miner/process.rb +0 -38
- data/lib/data_miner/tap.rb +0 -143
- data/test/support/aircraft.rb +0 -102
- data/test/support/airport.rb +0 -16
- data/test/support/automobile_fuel_type.rb +0 -40
- data/test/support/automobile_variant.rb +0 -362
- data/test/support/country.rb +0 -15
- data/test/support/test_database.rb +0 -311
- data/test/test_data_miner_attribute.rb +0 -111
- data/test/test_data_miner_process.rb +0 -18
- data/test/test_old_syntax.rb +0 -825
- data/test/test_tap.rb +0 -21
@@ -1,111 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
TestDatabase.load_models
|
5
|
-
|
6
|
-
class TestDataMinerAttribute < Test::Unit::TestCase
|
7
|
-
context '#value_from_row' do
|
8
|
-
setup do
|
9
|
-
@airport = Airport.new
|
10
|
-
end
|
11
|
-
context 'nullify is true' do
|
12
|
-
setup do
|
13
|
-
@attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
|
14
|
-
end
|
15
|
-
should 'return nil if field is blank' do
|
16
|
-
assert_nil @attribute.value_from_row(
|
17
|
-
'name' => 'DTW',
|
18
|
-
'city' => 'Warren',
|
19
|
-
'country_name' => 'US',
|
20
|
-
'latitude' => '',
|
21
|
-
'longitude' => ''
|
22
|
-
)
|
23
|
-
end
|
24
|
-
should 'return the value if field is not blank' do
|
25
|
-
assert_equal '12.34', @attribute.value_from_row(
|
26
|
-
'name' => 'DTW',
|
27
|
-
'city' => 'Warren',
|
28
|
-
'country_name' => 'US',
|
29
|
-
'latitude' => '12.34',
|
30
|
-
'longitude' => ''
|
31
|
-
)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
context 'nullify is false' do
|
35
|
-
setup do
|
36
|
-
@attribute = DataMiner::Attribute.new @airport, 'latitude'
|
37
|
-
end
|
38
|
-
should 'return the value if field is not blank' do
|
39
|
-
assert_equal '12.34', @attribute.value_from_row(
|
40
|
-
'name' => 'DTW',
|
41
|
-
'city' => 'Warren',
|
42
|
-
'country_name' => 'US',
|
43
|
-
'latitude' => '12.34',
|
44
|
-
'longitude' => ''
|
45
|
-
)
|
46
|
-
end
|
47
|
-
should 'return blank if field is blank' do
|
48
|
-
assert_equal '', @attribute.value_from_row(
|
49
|
-
'name' => 'DTW',
|
50
|
-
'city' => 'Warren',
|
51
|
-
'country_name' => 'US',
|
52
|
-
'latitude' => '',
|
53
|
-
'longitude' => ''
|
54
|
-
)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context '#set_record_from_row' do
|
60
|
-
setup do
|
61
|
-
@automobile_fuel_type = AutomobileFuelType.new
|
62
|
-
end
|
63
|
-
context 'nullify is true, wants units' do
|
64
|
-
setup do
|
65
|
-
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
|
66
|
-
end
|
67
|
-
should 'set value and units to nil if field is blank' do
|
68
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
69
|
-
'name' => 'electricity',
|
70
|
-
'annual_distance' => '',
|
71
|
-
'annual_distance_units' => ''
|
72
|
-
)
|
73
|
-
assert_nil @automobile_fuel_type.annual_distance
|
74
|
-
assert_nil @automobile_fuel_type.annual_distance_units
|
75
|
-
end
|
76
|
-
should 'set value and units if field is not blank' do
|
77
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
78
|
-
'name' => 'electricity',
|
79
|
-
'annual_distance' => '100.0',
|
80
|
-
'annual_distance_units' => 'kilometres'
|
81
|
-
)
|
82
|
-
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
83
|
-
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context 'nullify is false, wants units' do
|
88
|
-
setup do
|
89
|
-
@attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
|
90
|
-
end
|
91
|
-
should 'set value and units to blank if field is blank' do
|
92
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
93
|
-
'name' => 'electricity',
|
94
|
-
'annual_distance' => '',
|
95
|
-
'annual_distance_units' => ''
|
96
|
-
)
|
97
|
-
assert_equal 0.0, @automobile_fuel_type.annual_distance
|
98
|
-
assert_equal '', @automobile_fuel_type.annual_distance_units
|
99
|
-
end
|
100
|
-
should 'set value and units if field is not blank' do
|
101
|
-
@attribute.set_record_from_row(@automobile_fuel_type,
|
102
|
-
'name' => 'electricity',
|
103
|
-
'annual_distance' => '100.0',
|
104
|
-
'annual_distance_units' => 'kilometres'
|
105
|
-
)
|
106
|
-
assert_equal 100.0, @automobile_fuel_type.annual_distance
|
107
|
-
assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
class TestDataMinerProcess < Test::Unit::TestCase
|
5
|
-
context '#inspect' do
|
6
|
-
setup do
|
7
|
-
@config = DataMiner::Config.new 'foo'
|
8
|
-
end
|
9
|
-
should 'describe a block' do
|
10
|
-
process = DataMiner::Process.new(@config, 'something cool') { }
|
11
|
-
assert_match /something cool/, process.inspect
|
12
|
-
end
|
13
|
-
should 'describe a method' do
|
14
|
-
process = DataMiner::Process.new @config, :something_cool
|
15
|
-
assert_match /something_cool/, process.inspect
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/test/test_old_syntax.rb
DELETED
@@ -1,825 +0,0 @@
|
|
1
|
-
$:.push File.dirname(__FILE__)
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
TestDatabase.load_models
|
5
|
-
|
6
|
-
class CensusRegion < ActiveRecord::Base
|
7
|
-
set_primary_key :number
|
8
|
-
|
9
|
-
data_miner do
|
10
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
11
|
-
key 'number', :field_name => 'Region'
|
12
|
-
store 'name', :field_name => 'Name'
|
13
|
-
end
|
14
|
-
|
15
|
-
# pretend this is a different data source
|
16
|
-
# fake! just for testing purposes
|
17
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
18
|
-
key 'number', :field_name => 'Region'
|
19
|
-
store 'name', :field_name => 'Name'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# smaller than a region
|
25
|
-
class CensusDivision < ActiveRecord::Base
|
26
|
-
set_primary_key :number
|
27
|
-
|
28
|
-
data_miner do
|
29
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
30
|
-
key 'number', :field_name => 'Division'
|
31
|
-
store 'name', :field_name => 'Name'
|
32
|
-
store 'census_region_number', :field_name => 'Region'
|
33
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class CensusDivisionDeux < ActiveRecord::Base
|
39
|
-
set_primary_key :number
|
40
|
-
|
41
|
-
data_miner do
|
42
|
-
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
43
|
-
key 'number', :field_name => 'Division'
|
44
|
-
store 'name', :field_name => 'Name'
|
45
|
-
store 'census_region_number', :field_name => 'Region'
|
46
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => DataMiner::Dictionary.new(:input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv')
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class CrosscallingCensusRegion < ActiveRecord::Base
|
52
|
-
set_primary_key :number
|
53
|
-
|
54
|
-
has_many :crosscalling_census_divisions
|
55
|
-
|
56
|
-
data_miner do
|
57
|
-
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
58
|
-
CrosscallingCensusDivision.run_data_miner!
|
59
|
-
connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
|
60
|
-
t.column :number, :integer
|
61
|
-
t.column :name, :string
|
62
|
-
end
|
63
|
-
connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
|
64
|
-
connection.execute %{
|
65
|
-
INSERT IGNORE INTO crosscalling_census_regions(number, name)
|
66
|
-
SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
|
67
|
-
}
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
class CrosscallingCensusDivision < ActiveRecord::Base
|
73
|
-
set_primary_key :number
|
74
|
-
|
75
|
-
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
76
|
-
|
77
|
-
data_miner do
|
78
|
-
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
79
|
-
key 'number', :field_name => 'Division'
|
80
|
-
store 'name', :field_name => 'Name'
|
81
|
-
store 'census_region_number', :field_name => 'Region'
|
82
|
-
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
83
|
-
end
|
84
|
-
|
85
|
-
process "make sure my parent object is set up (i.e., cross-call it)" do
|
86
|
-
CrosscallingCensusRegion.run_data_miner!
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
92
|
-
set_primary_key :department_of_energy_identifier
|
93
|
-
|
94
|
-
data_miner do
|
95
|
-
process 'Define some unit conversions' do
|
96
|
-
Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
|
97
|
-
Conversions.register :square_feet, :square_metres, 0.09290304
|
98
|
-
end
|
99
|
-
|
100
|
-
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
101
|
-
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
102
|
-
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
103
|
-
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
104
|
-
|
105
|
-
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
106
|
-
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
107
|
-
store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
108
|
-
store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
|
109
|
-
store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
|
110
|
-
store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
|
111
|
-
store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
|
112
|
-
store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
|
113
|
-
store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
|
114
|
-
|
115
|
-
store 'census_division_number', :field_name => 'DIVISION'
|
116
|
-
store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
117
|
-
store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
118
|
-
store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
119
|
-
|
120
|
-
store 'floorspace', :field_name => 'TOTSQFT'
|
121
|
-
store 'residents', :field_name => 'NHSLDMEM'
|
122
|
-
store 'ownership', :field_name => 'KOWNRENT'
|
123
|
-
store 'thermostat_programmability', :field_name => 'PROTHERM'
|
124
|
-
store 'refrigerator_count', :field_name => 'NUMFRIG'
|
125
|
-
store 'freezer_count', :field_name => 'NUMFREEZ'
|
126
|
-
store 'heating_degree_days', :field_name => 'HD65'
|
127
|
-
store 'cooling_degree_days', :field_name => 'CD65'
|
128
|
-
store 'annual_energy_from_fuel_oil_for_heating_space', :field_name => 'BTUFOSPH'
|
129
|
-
store 'annual_energy_from_fuel_oil_for_heating_water', :field_name => 'BTUFOWTH'
|
130
|
-
store 'annual_energy_from_fuel_oil_for_appliances', :field_name => 'BTUFOAPL'
|
131
|
-
store 'annual_energy_from_natural_gas_for_heating_space', :field_name => 'BTUNGSPH'
|
132
|
-
store 'annual_energy_from_natural_gas_for_heating_water', :field_name => 'BTUNGWTH'
|
133
|
-
store 'annual_energy_from_natural_gas_for_appliances', :field_name => 'BTUNGAPL'
|
134
|
-
store 'annual_energy_from_propane_for_heating_space', :field_name => 'BTULPSPH'
|
135
|
-
store 'annual_energy_from_propane_for_heating_water', :field_name => 'BTULPWTH'
|
136
|
-
store 'annual_energy_from_propane_for_appliances', :field_name => 'BTULPAPL'
|
137
|
-
store 'annual_energy_from_wood', :field_name => 'BTUWOOD'
|
138
|
-
store 'annual_energy_from_kerosene', :field_name => 'BTUKER'
|
139
|
-
store 'annual_energy_from_electricity_for_clothes_driers', :field_name => 'BTUELCDR'
|
140
|
-
store 'annual_energy_from_electricity_for_dishwashers', :field_name => 'BTUELDWH'
|
141
|
-
store 'annual_energy_from_electricity_for_freezers', :field_name => 'BTUELFZZ'
|
142
|
-
store 'annual_energy_from_electricity_for_refrigerators', :field_name => 'BTUELRFG'
|
143
|
-
store 'annual_energy_from_electricity_for_air_conditioners', :field_name => 'BTUELCOL'
|
144
|
-
store 'annual_energy_from_electricity_for_heating_space', :field_name => 'BTUELSPH'
|
145
|
-
store 'annual_energy_from_electricity_for_heating_water', :field_name => 'BTUELWTH'
|
146
|
-
store 'annual_energy_from_electricity_for_other_appliances', :field_name => 'BTUELAPL'
|
147
|
-
store 'weighting', :field_name => 'NWEIGHT'
|
148
|
-
store 'total_rooms', :field_name => 'TOTROOMS'
|
149
|
-
store 'bathrooms', :field_name => 'NCOMBATH'
|
150
|
-
store 'halfbaths', :field_name => 'NHAFBATH'
|
151
|
-
store 'heated_garage', :field_name => 'GARGHEAT'
|
152
|
-
store 'attached_1car_garage', :field_name => 'GARAGE1C'
|
153
|
-
store 'detached_1car_garage', :field_name => 'DGARG1C'
|
154
|
-
store 'attached_2car_garage', :field_name => 'GARAGE2C'
|
155
|
-
store 'detached_2car_garage', :field_name => 'DGARG2C'
|
156
|
-
store 'attached_3car_garage', :field_name => 'GARAGE3C'
|
157
|
-
store 'detached_3car_garage', :field_name => 'DGARG3C'
|
158
|
-
store 'lights_on_1_to_4_hours', :field_name => 'LGT1'
|
159
|
-
store 'efficient_lights_on_1_to_4_hours', :field_name => 'LGT1EE'
|
160
|
-
store 'lights_on_4_to_12_hours', :field_name => 'LGT4'
|
161
|
-
store 'efficient_lights_on_4_to_12_hours', :field_name => 'LGT4EE'
|
162
|
-
store 'lights_on_over_12_hours', :field_name => 'LGT12'
|
163
|
-
store 'efficient_lights_on_over_12_hours', :field_name => 'LGT12EE'
|
164
|
-
store 'outdoor_all_night_lights', :field_name => 'NOUTLGTNT'
|
165
|
-
store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
|
166
|
-
end
|
167
|
-
|
168
|
-
# Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
|
169
|
-
# This makes it easier to derive useful information like "how many rooms does the house have?"
|
170
|
-
process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
|
171
|
-
%w{
|
172
|
-
annual_energy_from_electricity_for_air_conditioners
|
173
|
-
annual_energy_from_electricity_for_clothes_driers
|
174
|
-
annual_energy_from_electricity_for_dishwashers
|
175
|
-
annual_energy_from_electricity_for_freezers
|
176
|
-
annual_energy_from_electricity_for_heating_space
|
177
|
-
annual_energy_from_electricity_for_heating_water
|
178
|
-
annual_energy_from_electricity_for_other_appliances
|
179
|
-
annual_energy_from_electricity_for_refrigerators
|
180
|
-
annual_energy_from_fuel_oil_for_appliances
|
181
|
-
annual_energy_from_fuel_oil_for_heating_space
|
182
|
-
annual_energy_from_fuel_oil_for_heating_water
|
183
|
-
annual_energy_from_kerosene
|
184
|
-
annual_energy_from_propane_for_appliances
|
185
|
-
annual_energy_from_propane_for_heating_space
|
186
|
-
annual_energy_from_propane_for_heating_water
|
187
|
-
annual_energy_from_natural_gas_for_appliances
|
188
|
-
annual_energy_from_natural_gas_for_heating_space
|
189
|
-
annual_energy_from_natural_gas_for_heating_water
|
190
|
-
annual_energy_from_wood
|
191
|
-
lights_on_1_to_4_hours
|
192
|
-
lights_on_over_12_hours
|
193
|
-
efficient_lights_on_over_12_hours
|
194
|
-
efficient_lights_on_1_to_4_hours
|
195
|
-
lights_on_4_to_12_hours
|
196
|
-
efficient_lights_on_4_to_12_hours
|
197
|
-
outdoor_all_night_gas_lights
|
198
|
-
outdoor_all_night_lights
|
199
|
-
thermostat_programmability
|
200
|
-
detached_1car_garage
|
201
|
-
detached_2car_garage
|
202
|
-
detached_3car_garage
|
203
|
-
attached_1car_garage
|
204
|
-
attached_2car_garage
|
205
|
-
attached_3car_garage
|
206
|
-
heated_garage
|
207
|
-
}.each do |attr_name|
|
208
|
-
max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
|
209
|
-
# if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
|
210
|
-
if /^9+$/.match(max.to_i.to_s)
|
211
|
-
update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
|
212
|
-
end
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
|
217
|
-
[
|
218
|
-
[ 'floorspace', :square_feet, :square_metres ],
|
219
|
-
[ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
|
220
|
-
[ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
|
221
|
-
[ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
|
222
|
-
[ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
|
223
|
-
[ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
|
224
|
-
[ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
|
225
|
-
[ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
|
226
|
-
[ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
|
227
|
-
[ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
|
228
|
-
[ 'annual_energy_from_wood', :kbtus, :joules ],
|
229
|
-
[ 'annual_energy_from_kerosene', :kbtus, :joules ],
|
230
|
-
[ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
|
231
|
-
[ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
|
232
|
-
[ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
|
233
|
-
[ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
|
234
|
-
[ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
|
235
|
-
[ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
|
236
|
-
[ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
|
237
|
-
[ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
|
238
|
-
].each do |attr_name, from_units, to_units|
|
239
|
-
update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
process 'Add a new field "rooms" that estimates how many rooms are in the house' do
|
244
|
-
update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
|
245
|
-
end
|
246
|
-
|
247
|
-
process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
|
248
|
-
update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
|
249
|
-
end
|
250
|
-
|
251
|
-
process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
|
252
|
-
update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
|
253
|
-
end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
# T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
|
258
|
-
class T100FlightSegment < ActiveRecord::Base
|
259
|
-
set_primary_key :row_hash
|
260
|
-
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
261
|
-
FORM_DATA = %{
|
262
|
-
UserTableName=T_100_Segment__All_Carriers&
|
263
|
-
DBShortName=Air_Carriers&
|
264
|
-
RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
|
265
|
-
sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
|
266
|
-
varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
|
267
|
-
grouplist=&
|
268
|
-
suml=&
|
269
|
-
sumRegion=&
|
270
|
-
filter1=title%3D&
|
271
|
-
filter2=title%3D&
|
272
|
-
geo=All%A0&
|
273
|
-
time=__MONTH_NAME__&
|
274
|
-
timename=Month&
|
275
|
-
GEOGRAPHY=All&
|
276
|
-
XYEAR=__YEAR__&
|
277
|
-
FREQUENCY=__MONTH_NUMBER__&
|
278
|
-
AllVars=All&
|
279
|
-
VarName=DEPARTURES_SCHEDULED&
|
280
|
-
VarDesc=DepScheduled&
|
281
|
-
VarType=Num&
|
282
|
-
VarName=DEPARTURES_PERFORMED&
|
283
|
-
VarDesc=DepPerformed&
|
284
|
-
VarType=Num&
|
285
|
-
VarName=PAYLOAD&
|
286
|
-
VarDesc=Payload&
|
287
|
-
VarType=Num&
|
288
|
-
VarName=SEATS&
|
289
|
-
VarDesc=Seats&
|
290
|
-
VarType=Num&
|
291
|
-
VarName=PASSENGERS&
|
292
|
-
VarDesc=Passengers&
|
293
|
-
VarType=Num&
|
294
|
-
VarName=FREIGHT&
|
295
|
-
VarDesc=Freight&
|
296
|
-
VarType=Num&
|
297
|
-
VarName=MAIL&
|
298
|
-
VarDesc=Mail&
|
299
|
-
VarType=Num&
|
300
|
-
VarName=DISTANCE&
|
301
|
-
VarDesc=Distance&
|
302
|
-
VarType=Num&
|
303
|
-
VarName=RAMP_TO_RAMP&
|
304
|
-
VarDesc=RampToRamp&
|
305
|
-
VarType=Num&
|
306
|
-
VarName=AIR_TIME&
|
307
|
-
VarDesc=AirTime&
|
308
|
-
VarType=Num&
|
309
|
-
VarName=UNIQUE_CARRIER&
|
310
|
-
VarDesc=UniqueCarrier&
|
311
|
-
VarType=Char&
|
312
|
-
VarName=AIRLINE_ID&
|
313
|
-
VarDesc=AirlineID&
|
314
|
-
VarType=Num&
|
315
|
-
VarName=UNIQUE_CARRIER_NAME&
|
316
|
-
VarDesc=UniqueCarrierName&
|
317
|
-
VarType=Char&
|
318
|
-
VarName=UNIQUE_CARRIER_ENTITY&
|
319
|
-
VarDesc=UniqCarrierEntity&
|
320
|
-
VarType=Char&
|
321
|
-
VarName=REGION&
|
322
|
-
VarDesc=CarrierRegion&
|
323
|
-
VarType=Char&
|
324
|
-
VarName=CARRIER&
|
325
|
-
VarDesc=Carrier&
|
326
|
-
VarType=Char&
|
327
|
-
VarName=CARRIER_NAME&
|
328
|
-
VarDesc=CarrierName&
|
329
|
-
VarType=Char&
|
330
|
-
VarName=CARRIER_GROUP&
|
331
|
-
VarDesc=CarrierGroup&
|
332
|
-
VarType=Num&
|
333
|
-
VarName=CARRIER_GROUP_NEW&
|
334
|
-
VarDesc=CarrierGroupNew&
|
335
|
-
VarType=Num&
|
336
|
-
VarName=ORIGIN&
|
337
|
-
VarDesc=Origin&
|
338
|
-
VarType=Char&
|
339
|
-
VarName=ORIGIN_CITY_NAME&
|
340
|
-
VarDesc=OriginCityName&
|
341
|
-
VarType=Char&
|
342
|
-
VarName=ORIGIN_CITY_NUM&
|
343
|
-
VarDesc=OriginCityNum&
|
344
|
-
VarType=Num&
|
345
|
-
VarName=ORIGIN_STATE_ABR&
|
346
|
-
VarDesc=OriginState&
|
347
|
-
VarType=Char&
|
348
|
-
VarName=ORIGIN_STATE_FIPS&
|
349
|
-
VarDesc=OriginStateFips&
|
350
|
-
VarType=Char&
|
351
|
-
VarName=ORIGIN_STATE_NM&
|
352
|
-
VarDesc=OriginStateName&
|
353
|
-
VarType=Char&
|
354
|
-
VarName=ORIGIN_COUNTRY&
|
355
|
-
VarDesc=OriginCountry&
|
356
|
-
VarType=Char&
|
357
|
-
VarName=ORIGIN_COUNTRY_NAME&
|
358
|
-
VarDesc=OriginCountryName&
|
359
|
-
VarType=Char&
|
360
|
-
VarName=ORIGIN_WAC&
|
361
|
-
VarDesc=OriginWac&
|
362
|
-
VarType=Num&
|
363
|
-
VarName=DEST&
|
364
|
-
VarDesc=Dest&
|
365
|
-
VarType=Char&
|
366
|
-
VarName=DEST_CITY_NAME&
|
367
|
-
VarDesc=DestCityName&
|
368
|
-
VarType=Char&
|
369
|
-
VarName=DEST_CITY_NUM&
|
370
|
-
VarDesc=DestCityNum&
|
371
|
-
VarType=Num&
|
372
|
-
VarName=DEST_STATE_ABR&
|
373
|
-
VarDesc=DestState&
|
374
|
-
VarType=Char&
|
375
|
-
VarName=DEST_STATE_FIPS&
|
376
|
-
VarDesc=DestStateFips&
|
377
|
-
VarType=Char&
|
378
|
-
VarName=DEST_STATE_NM&
|
379
|
-
VarDesc=DestStateName&
|
380
|
-
VarType=Char&
|
381
|
-
VarName=DEST_COUNTRY&
|
382
|
-
VarDesc=DestCountry&
|
383
|
-
VarType=Char&
|
384
|
-
VarName=DEST_COUNTRY_NAME&
|
385
|
-
VarDesc=DestCountryName&
|
386
|
-
VarType=Char&
|
387
|
-
VarName=DEST_WAC&
|
388
|
-
VarDesc=DestWac&
|
389
|
-
VarType=Num&
|
390
|
-
VarName=AIRCRAFT_GROUP&
|
391
|
-
VarDesc=AircraftGroup&
|
392
|
-
VarType=Num&
|
393
|
-
VarName=AIRCRAFT_TYPE&
|
394
|
-
VarDesc=AircraftType&
|
395
|
-
VarType=Char&
|
396
|
-
VarName=AIRCRAFT_CONFIG&
|
397
|
-
VarDesc=AircraftConfig&
|
398
|
-
VarType=Num&
|
399
|
-
VarName=YEAR&
|
400
|
-
VarDesc=Year&
|
401
|
-
VarType=Num&
|
402
|
-
VarName=QUARTER&
|
403
|
-
VarDesc=Quarter&
|
404
|
-
VarType=Num&
|
405
|
-
VarName=MONTH&
|
406
|
-
VarDesc=Month&
|
407
|
-
VarType=Num&
|
408
|
-
VarName=DISTANCE_GROUP&
|
409
|
-
VarDesc=DistanceGroup&
|
410
|
-
VarType=Num&
|
411
|
-
VarName=CLASS&
|
412
|
-
VarDesc=Class&
|
413
|
-
VarType=Char&
|
414
|
-
VarName=DATA_SOURCE&
|
415
|
-
VarDesc=DataSource&
|
416
|
-
VarType=Char
|
417
|
-
}.gsub /[\s]+/,''
|
418
|
-
|
419
|
-
data_miner do
|
420
|
-
months = Hash.new
|
421
|
-
# (2008..2009).each do |year|
|
422
|
-
(2008..2008).each do |year|
|
423
|
-
# (1..12).each do |month|
|
424
|
-
(1..1).each do |month|
|
425
|
-
time = Time.gm year, month
|
426
|
-
form_data = FORM_DATA.dup
|
427
|
-
form_data.gsub! '__YEAR__', time.year.to_s
|
428
|
-
form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
|
429
|
-
form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
|
430
|
-
months[time] = form_data
|
431
|
-
end
|
432
|
-
end
|
433
|
-
months.each do |month, form_data|
|
434
|
-
import "T100 data from #{month.strftime('%B %Y')}",
|
435
|
-
:url => URL,
|
436
|
-
:form_data => form_data,
|
437
|
-
:compression => :zip,
|
438
|
-
:glob => '/*.csv' do
|
439
|
-
key 'row_hash'
|
440
|
-
store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
441
|
-
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
442
|
-
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
443
|
-
store 'seats', :field_name => 'SEATS'
|
444
|
-
store 'passengers', :field_name => 'PASSENGERS'
|
445
|
-
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
446
|
-
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
447
|
-
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
448
|
-
store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
|
449
|
-
store 'air_time', :field_name => 'AIR_TIME'
|
450
|
-
store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
|
451
|
-
store 'dot_airline_id', :field_name => 'AIRLINE_ID'
|
452
|
-
store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
453
|
-
store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
454
|
-
store 'region', :field_name => 'REGION'
|
455
|
-
store 'carrier', :field_name => 'CARRIER'
|
456
|
-
store 'carrier_name', :field_name => 'CARRIER_NAME'
|
457
|
-
store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
458
|
-
store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
459
|
-
store 'origin_airport_iata', :field_name => 'ORIGIN'
|
460
|
-
store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
461
|
-
store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
462
|
-
store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
463
|
-
store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
464
|
-
store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
465
|
-
store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
|
466
|
-
store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
467
|
-
store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
468
|
-
store 'dest_airport_iata', :field_name => 'DEST'
|
469
|
-
store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
470
|
-
store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
471
|
-
store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
472
|
-
store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
473
|
-
store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
474
|
-
store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
|
475
|
-
store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
476
|
-
store 'dest_wac', :field_name => 'DEST_WAC'
|
477
|
-
store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
|
478
|
-
store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
|
479
|
-
store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
|
480
|
-
store 'year', :field_name => 'YEAR'
|
481
|
-
store 'quarter', :field_name => 'QUARTER'
|
482
|
-
store 'month', :field_name => 'MONTH'
|
483
|
-
store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
|
484
|
-
store 'bts_service_class', :field_name => 'CLASS'
|
485
|
-
store 'data_source', :field_name => 'DATA_SOURCE'
|
486
|
-
end
|
487
|
-
end
|
488
|
-
|
489
|
-
process 'Derive freight share as a fraction of payload' do
|
490
|
-
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
491
|
-
end
|
492
|
-
|
493
|
-
process 'Derive load factor, which is passengers divided by the total seats available' do
|
494
|
-
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
495
|
-
end
|
496
|
-
|
497
|
-
process 'Derive average seats per departure' do
|
498
|
-
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
499
|
-
end
|
500
|
-
end
|
501
|
-
end
|
502
|
-
|
503
|
-
# note that this depends on stuff in Aircraft
|
504
|
-
class AircraftDeux < ActiveRecord::Base
|
505
|
-
set_primary_key :icao_code
|
506
|
-
|
507
|
-
# defined on the class because we defined the errata with a shorthand
|
508
|
-
class << self
|
509
|
-
def is_not_attributed_to_aerospatiale?(row)
|
510
|
-
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
511
|
-
end
|
512
|
-
|
513
|
-
def is_not_attributed_to_cessna?(row)
|
514
|
-
not row['Manufacturer'] =~ /CESSNA/i
|
515
|
-
end
|
516
|
-
|
517
|
-
def is_not_attributed_to_learjet?(row)
|
518
|
-
not row['Manufacturer'] =~ /LEAR/i
|
519
|
-
end
|
520
|
-
|
521
|
-
def is_not_attributed_to_dehavilland?(row)
|
522
|
-
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
523
|
-
end
|
524
|
-
|
525
|
-
def is_not_attributed_to_mcdonnell_douglas?(row)
|
526
|
-
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
527
|
-
end
|
528
|
-
|
529
|
-
def is_not_a_dc_plane?(row)
|
530
|
-
not row['Model'] =~ /DC/i
|
531
|
-
end
|
532
|
-
|
533
|
-
def is_a_crj_900?(row)
|
534
|
-
row['Designator'].downcase == 'crj9'
|
535
|
-
end
|
536
|
-
end
|
537
|
-
|
538
|
-
data_miner do
|
539
|
-
# ('A'..'Z').each do |letter|
|
540
|
-
# Note: for the purposes of testing, only importing "D"
|
541
|
-
%w{ D }.each do |letter|
|
542
|
-
import("ICAO codes starting with letter #{letter} used by the FAA",
|
543
|
-
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
544
|
-
:encoding => 'windows-1252',
|
545
|
-
:errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
|
546
|
-
:row_xpath => '//table/tr[2]/td/table/tr',
|
547
|
-
:column_xpath => 'td') do
|
548
|
-
key 'icao_code', :field_name => 'Designator'
|
549
|
-
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
550
|
-
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
551
|
-
store 'manufacturer_name', :field_name => 'Manufacturer'
|
552
|
-
store 'name', :field_name => 'Model'
|
553
|
-
end
|
554
|
-
end
|
555
|
-
end
|
556
|
-
end
|
557
|
-
|
558
|
-
class AutomobileMakeFleetYear < ActiveRecord::Base
|
559
|
-
set_primary_key :name
|
560
|
-
|
561
|
-
col :name
|
562
|
-
col :make_name
|
563
|
-
col :fleet
|
564
|
-
col :year, :type => :integer
|
565
|
-
col :fuel_efficiency, :type => :float
|
566
|
-
col :fuel_efficiency_units
|
567
|
-
col :volume, :type => :integer
|
568
|
-
col :make_year_name
|
569
|
-
col :created_at, :type => :datetime
|
570
|
-
col :updated_at, :type => :datetime
|
571
|
-
|
572
|
-
data_miner do
|
573
|
-
process :auto_upgrade!
|
574
|
-
|
575
|
-
process "finish if i tell you to" do
|
576
|
-
raise DataMiner::Finish if $force_finish
|
577
|
-
end
|
578
|
-
|
579
|
-
process "skip if i tell you to" do
|
580
|
-
raise DataMiner::Skip if $force_skip
|
581
|
-
end
|
582
|
-
|
583
|
-
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
584
|
-
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
|
585
|
-
:errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
|
586
|
-
:select => lambda { |row| row['volume'].to_i > 0 } do
|
587
|
-
key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
|
588
|
-
store 'make_name', :field_name => 'manufacturer_name'
|
589
|
-
store 'year', :field_name => 'year_content'
|
590
|
-
store 'fleet', :chars => 2..3 # zero-based
|
591
|
-
store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
592
|
-
store 'volume'
|
593
|
-
end
|
594
|
-
end
|
595
|
-
end
|
596
|
-
|
597
|
-
class CensusDivisionTrois < ActiveRecord::Base
|
598
|
-
set_primary_key :number_code
|
599
|
-
|
600
|
-
col :number_code
|
601
|
-
col :name
|
602
|
-
col :census_region_name
|
603
|
-
col :census_region_number, :type => :integer
|
604
|
-
add_index 'census_region_name', :name => 'homefry'
|
605
|
-
add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
606
|
-
|
607
|
-
data_miner do
|
608
|
-
process :auto_upgrade!
|
609
|
-
end
|
610
|
-
end
|
611
|
-
|
612
|
-
class CensusDivisionFour < ActiveRecord::Base
|
613
|
-
col :number_code
|
614
|
-
col :name
|
615
|
-
col :census_region_name
|
616
|
-
col :census_region_number, :type => :integer
|
617
|
-
add_index 'census_region_name', :name => 'homefry'
|
618
|
-
|
619
|
-
data_miner do
|
620
|
-
process :auto_upgrade!
|
621
|
-
end
|
622
|
-
end
|
623
|
-
|
624
|
-
# todo: have somebody properly organize these
|
625
|
-
class TestOldSyntax < Test::Unit::TestCase
|
626
|
-
if ENV['WIP']
|
627
|
-
context 'with nullify option' do
|
628
|
-
should 'treat blank fields as null values' do
|
629
|
-
Aircraft.delete_all
|
630
|
-
Aircraft.data_miner_runs.delete_all
|
631
|
-
Aircraft.run_data_miner!
|
632
|
-
assert_greater_than 0, Aircraft.count
|
633
|
-
assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
|
634
|
-
end
|
635
|
-
end
|
636
|
-
end
|
637
|
-
|
638
|
-
if ENV['ALL'] == 'true'
|
639
|
-
should 'directly create a table for the model' do
|
640
|
-
if AutomobileMakeFleetYear.table_exists?
|
641
|
-
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
642
|
-
end
|
643
|
-
AutomobileMakeFleetYear.auto_upgrade!
|
644
|
-
assert AutomobileMakeFleetYear.table_exists?
|
645
|
-
end
|
646
|
-
end
|
647
|
-
|
648
|
-
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
649
|
-
should 'append to an existing config' do
|
650
|
-
AutomobileFuelType.class_eval do
|
651
|
-
data_miner :append => true do
|
652
|
-
import 'example1', :url => 'http://example1.com' do
|
653
|
-
key 'code'
|
654
|
-
store 'name'
|
655
|
-
end
|
656
|
-
end
|
657
|
-
data_miner :append => true do
|
658
|
-
import 'example2', :url => 'http://example2.com' do
|
659
|
-
key 'code'
|
660
|
-
store 'name'
|
661
|
-
end
|
662
|
-
end
|
663
|
-
end
|
664
|
-
assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
|
665
|
-
assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
|
666
|
-
end
|
667
|
-
|
668
|
-
should 'override an existing data_miner configuration' do
|
669
|
-
AutomobileFuelType.class_eval do
|
670
|
-
data_miner do
|
671
|
-
import 'example', :url => 'http://example.com' do
|
672
|
-
key 'code'
|
673
|
-
store 'name'
|
674
|
-
end
|
675
|
-
end
|
676
|
-
end
|
677
|
-
assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
|
678
|
-
assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
|
679
|
-
end
|
680
|
-
should "stop and finish if it gets a DataMiner::Finish" do
|
681
|
-
AutomobileMakeFleetYear.delete_all
|
682
|
-
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
683
|
-
$force_finish = true
|
684
|
-
AutomobileMakeFleetYear.run_data_miner!
|
685
|
-
assert_equal 0, AutomobileMakeFleetYear.count
|
686
|
-
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
687
|
-
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
|
688
|
-
$force_finish = false
|
689
|
-
AutomobileMakeFleetYear.run_data_miner!
|
690
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
691
|
-
end
|
692
|
-
|
693
|
-
should "stop and register skipped if it gets a DataMiner::Skip" do
|
694
|
-
AutomobileMakeFleetYear.delete_all
|
695
|
-
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
696
|
-
$force_skip = true
|
697
|
-
AutomobileMakeFleetYear.run_data_miner!
|
698
|
-
assert_equal 0, AutomobileMakeFleetYear.count
|
699
|
-
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
700
|
-
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
|
701
|
-
$force_skip = false
|
702
|
-
AutomobileMakeFleetYear.run_data_miner!
|
703
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
704
|
-
end
|
705
|
-
|
706
|
-
should "allow specifying dictionaries explicitly" do
|
707
|
-
CensusDivisionDeux.run_data_miner!
|
708
|
-
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
709
|
-
end
|
710
|
-
|
711
|
-
should "be able to key on things other than the primary key" do
|
712
|
-
Aircraft.run_data_miner!
|
713
|
-
assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
|
714
|
-
end
|
715
|
-
|
716
|
-
should "be able to synthesize rows without using a full parser class" do
|
717
|
-
AutomobileMakeFleetYear.run_data_miner!
|
718
|
-
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
719
|
-
end
|
720
|
-
|
721
|
-
should "keep a call stack so that you can call run_data_miner! on a child" do
|
722
|
-
CrosscallingCensusDivision.run_data_miner!
|
723
|
-
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
724
|
-
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
725
|
-
end
|
726
|
-
|
727
|
-
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
728
|
-
CrosscallingCensusRegion.run_data_miner!
|
729
|
-
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
730
|
-
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
731
|
-
end
|
732
|
-
|
733
|
-
should "import airports" do
|
734
|
-
Airport.run_data_miner!
|
735
|
-
assert Airport.count > 0
|
736
|
-
end
|
737
|
-
|
738
|
-
should "pull in census divisions using a data.brighterplanet.com dictionary" do
|
739
|
-
CensusDivision.run_data_miner!
|
740
|
-
assert CensusDivision.count > 0
|
741
|
-
end
|
742
|
-
|
743
|
-
should "have a way to queue up runs that works with delated_job's send_later" do
|
744
|
-
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
745
|
-
end
|
746
|
-
|
747
|
-
should "be idempotent" do
|
748
|
-
Country.data_miner_config.run
|
749
|
-
a = Country.count
|
750
|
-
Country.data_miner_config.run
|
751
|
-
b = Country.count
|
752
|
-
assert_equal a, b
|
753
|
-
|
754
|
-
CensusRegion.data_miner_config.run
|
755
|
-
a = CensusRegion.count
|
756
|
-
CensusRegion.data_miner_config.run
|
757
|
-
b = CensusRegion.count
|
758
|
-
assert_equal a, b
|
759
|
-
end
|
760
|
-
|
761
|
-
should "hash things" do
|
762
|
-
AutomobileVariant.data_miner_config.steps[0].run
|
763
|
-
assert AutomobileVariant.first.row_hash.present?
|
764
|
-
end
|
765
|
-
|
766
|
-
should "process a callback block instead of a method" do
|
767
|
-
AutomobileVariant.delete_all
|
768
|
-
AutomobileVariant.data_miner_config.steps[0].run
|
769
|
-
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
770
|
-
AutomobileVariant.data_miner_config.steps.last.run
|
771
|
-
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
772
|
-
end
|
773
|
-
|
774
|
-
should "keep a log when it does a run" do
|
775
|
-
approx_started_at = Time.now
|
776
|
-
DataMiner.run :resource_names => %w{ Country }
|
777
|
-
approx_terminated_at = Time.now
|
778
|
-
last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
|
779
|
-
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
780
|
-
assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
|
781
|
-
end
|
782
|
-
|
783
|
-
should "request a re-import from scratch" do
|
784
|
-
c = Country.new
|
785
|
-
c.iso_3166 = 'JUNK'
|
786
|
-
c.save!
|
787
|
-
assert Country.exists?(:iso_3166 => 'JUNK')
|
788
|
-
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
789
|
-
assert !Country.exists?(:iso_3166 => 'JUNK')
|
790
|
-
end
|
791
|
-
|
792
|
-
should "know what runs were on a resource" do
|
793
|
-
DataMiner.run :resource_names => %w{ Country }
|
794
|
-
DataMiner.run :resource_names => %w{ Country }
|
795
|
-
assert Country.data_miner_runs.count > 0
|
796
|
-
end
|
797
|
-
end
|
798
|
-
|
799
|
-
if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
|
800
|
-
should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
|
801
|
-
AircraftDeux.run_data_miner!
|
802
|
-
assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
803
|
-
end
|
804
|
-
|
805
|
-
should "mine aircraft" do
|
806
|
-
Aircraft.run_data_miner!
|
807
|
-
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
808
|
-
end
|
809
|
-
|
810
|
-
should "mine automobile variants" do
|
811
|
-
AutomobileVariant.run_data_miner!
|
812
|
-
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
813
|
-
end
|
814
|
-
|
815
|
-
should "mine T100 flight segments" do
|
816
|
-
T100FlightSegment.run_data_miner!
|
817
|
-
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
818
|
-
end
|
819
|
-
|
820
|
-
should "mine residence survey responses" do
|
821
|
-
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
822
|
-
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
|
823
|
-
end
|
824
|
-
end
|
825
|
-
end
|