data_miner 1.3.8 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,111 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- TestDatabase.load_models
5
-
6
- class TestDataMinerAttribute < Test::Unit::TestCase
7
- context '#value_from_row' do
8
- setup do
9
- @airport = Airport.new
10
- end
11
- context 'nullify is true' do
12
- setup do
13
- @attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
14
- end
15
- should 'return nil if field is blank' do
16
- assert_nil @attribute.value_from_row(
17
- 'name' => 'DTW',
18
- 'city' => 'Warren',
19
- 'country_name' => 'US',
20
- 'latitude' => '',
21
- 'longitude' => ''
22
- )
23
- end
24
- should 'return the value if field is not blank' do
25
- assert_equal '12.34', @attribute.value_from_row(
26
- 'name' => 'DTW',
27
- 'city' => 'Warren',
28
- 'country_name' => 'US',
29
- 'latitude' => '12.34',
30
- 'longitude' => ''
31
- )
32
- end
33
- end
34
- context 'nullify is false' do
35
- setup do
36
- @attribute = DataMiner::Attribute.new @airport, 'latitude'
37
- end
38
- should 'return the value if field is not blank' do
39
- assert_equal '12.34', @attribute.value_from_row(
40
- 'name' => 'DTW',
41
- 'city' => 'Warren',
42
- 'country_name' => 'US',
43
- 'latitude' => '12.34',
44
- 'longitude' => ''
45
- )
46
- end
47
- should 'return blank if field is blank' do
48
- assert_equal '', @attribute.value_from_row(
49
- 'name' => 'DTW',
50
- 'city' => 'Warren',
51
- 'country_name' => 'US',
52
- 'latitude' => '',
53
- 'longitude' => ''
54
- )
55
- end
56
- end
57
- end
58
-
59
- context '#set_record_from_row' do
60
- setup do
61
- @automobile_fuel_type = AutomobileFuelType.new
62
- end
63
- context 'nullify is true, wants units' do
64
- setup do
65
- @attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
66
- end
67
- should 'set value and units to nil if field is blank' do
68
- @attribute.set_record_from_row(@automobile_fuel_type,
69
- 'name' => 'electricity',
70
- 'annual_distance' => '',
71
- 'annual_distance_units' => ''
72
- )
73
- assert_nil @automobile_fuel_type.annual_distance
74
- assert_nil @automobile_fuel_type.annual_distance_units
75
- end
76
- should 'set value and units if field is not blank' do
77
- @attribute.set_record_from_row(@automobile_fuel_type,
78
- 'name' => 'electricity',
79
- 'annual_distance' => '100.0',
80
- 'annual_distance_units' => 'kilometres'
81
- )
82
- assert_equal 100.0, @automobile_fuel_type.annual_distance
83
- assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
84
- end
85
- end
86
-
87
- context 'nullify is false, wants units' do
88
- setup do
89
- @attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
90
- end
91
- should 'set value and units to blank if field is blank' do
92
- @attribute.set_record_from_row(@automobile_fuel_type,
93
- 'name' => 'electricity',
94
- 'annual_distance' => '',
95
- 'annual_distance_units' => ''
96
- )
97
- assert_equal 0.0, @automobile_fuel_type.annual_distance
98
- assert_equal '', @automobile_fuel_type.annual_distance_units
99
- end
100
- should 'set value and units if field is not blank' do
101
- @attribute.set_record_from_row(@automobile_fuel_type,
102
- 'name' => 'electricity',
103
- 'annual_distance' => '100.0',
104
- 'annual_distance_units' => 'kilometres'
105
- )
106
- assert_equal 100.0, @automobile_fuel_type.annual_distance
107
- assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
108
- end
109
- end
110
- end
111
- end
@@ -1,18 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- class TestDataMinerProcess < Test::Unit::TestCase
5
- context '#inspect' do
6
- setup do
7
- @config = DataMiner::Config.new 'foo'
8
- end
9
- should 'describe a block' do
10
- process = DataMiner::Process.new(@config, 'something cool') { }
11
- assert_match /something cool/, process.inspect
12
- end
13
- should 'describe a method' do
14
- process = DataMiner::Process.new @config, :something_cool
15
- assert_match /something_cool/, process.inspect
16
- end
17
- end
18
- end
@@ -1,825 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- TestDatabase.load_models
5
-
6
- class CensusRegion < ActiveRecord::Base
7
- set_primary_key :number
8
-
9
- data_miner do
10
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
11
- key 'number', :field_name => 'Region'
12
- store 'name', :field_name => 'Name'
13
- end
14
-
15
- # pretend this is a different data source
16
- # fake! just for testing purposes
17
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
18
- key 'number', :field_name => 'Region'
19
- store 'name', :field_name => 'Name'
20
- end
21
- end
22
- end
23
-
24
- # smaller than a region
25
- class CensusDivision < ActiveRecord::Base
26
- set_primary_key :number
27
-
28
- data_miner do
29
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
30
- key 'number', :field_name => 'Division'
31
- store 'name', :field_name => 'Name'
32
- store 'census_region_number', :field_name => 'Region'
33
- store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
34
- end
35
- end
36
- end
37
-
38
- class CensusDivisionDeux < ActiveRecord::Base
39
- set_primary_key :number
40
-
41
- data_miner do
42
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
43
- key 'number', :field_name => 'Division'
44
- store 'name', :field_name => 'Name'
45
- store 'census_region_number', :field_name => 'Region'
46
- store 'census_region_name', :field_name => 'Region', :dictionary => DataMiner::Dictionary.new(:input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv')
47
- end
48
- end
49
- end
50
-
51
- class CrosscallingCensusRegion < ActiveRecord::Base
52
- set_primary_key :number
53
-
54
- has_many :crosscalling_census_divisions
55
-
56
- data_miner do
57
- process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
58
- CrosscallingCensusDivision.run_data_miner!
59
- connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
60
- t.column :number, :integer
61
- t.column :name, :string
62
- end
63
- connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
64
- connection.execute %{
65
- INSERT IGNORE INTO crosscalling_census_regions(number, name)
66
- SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
67
- }
68
- end
69
- end
70
- end
71
-
72
- class CrosscallingCensusDivision < ActiveRecord::Base
73
- set_primary_key :number
74
-
75
- belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
76
-
77
- data_miner do
78
- import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
79
- key 'number', :field_name => 'Division'
80
- store 'name', :field_name => 'Name'
81
- store 'census_region_number', :field_name => 'Region'
82
- store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
83
- end
84
-
85
- process "make sure my parent object is set up (i.e., cross-call it)" do
86
- CrosscallingCensusRegion.run_data_miner!
87
- end
88
- end
89
- end
90
-
91
- class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
92
- set_primary_key :department_of_energy_identifier
93
-
94
- data_miner do
95
- process 'Define some unit conversions' do
96
- Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
97
- Conversions.register :square_feet, :square_metres, 0.09290304
98
- end
99
-
100
- # conversions are NOT performed here, since we first have to zero out legitimate skips
101
- # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
102
- import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
103
- key 'department_of_energy_identifier', :field_name => 'DOEID'
104
-
105
- store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
106
- store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
107
- store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
108
- store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
109
- store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
110
- store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
111
- store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
112
- store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
113
- store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
114
-
115
- store 'census_division_number', :field_name => 'DIVISION'
116
- store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
117
- store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
118
- store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
119
-
120
- store 'floorspace', :field_name => 'TOTSQFT'
121
- store 'residents', :field_name => 'NHSLDMEM'
122
- store 'ownership', :field_name => 'KOWNRENT'
123
- store 'thermostat_programmability', :field_name => 'PROTHERM'
124
- store 'refrigerator_count', :field_name => 'NUMFRIG'
125
- store 'freezer_count', :field_name => 'NUMFREEZ'
126
- store 'heating_degree_days', :field_name => 'HD65'
127
- store 'cooling_degree_days', :field_name => 'CD65'
128
- store 'annual_energy_from_fuel_oil_for_heating_space', :field_name => 'BTUFOSPH'
129
- store 'annual_energy_from_fuel_oil_for_heating_water', :field_name => 'BTUFOWTH'
130
- store 'annual_energy_from_fuel_oil_for_appliances', :field_name => 'BTUFOAPL'
131
- store 'annual_energy_from_natural_gas_for_heating_space', :field_name => 'BTUNGSPH'
132
- store 'annual_energy_from_natural_gas_for_heating_water', :field_name => 'BTUNGWTH'
133
- store 'annual_energy_from_natural_gas_for_appliances', :field_name => 'BTUNGAPL'
134
- store 'annual_energy_from_propane_for_heating_space', :field_name => 'BTULPSPH'
135
- store 'annual_energy_from_propane_for_heating_water', :field_name => 'BTULPWTH'
136
- store 'annual_energy_from_propane_for_appliances', :field_name => 'BTULPAPL'
137
- store 'annual_energy_from_wood', :field_name => 'BTUWOOD'
138
- store 'annual_energy_from_kerosene', :field_name => 'BTUKER'
139
- store 'annual_energy_from_electricity_for_clothes_driers', :field_name => 'BTUELCDR'
140
- store 'annual_energy_from_electricity_for_dishwashers', :field_name => 'BTUELDWH'
141
- store 'annual_energy_from_electricity_for_freezers', :field_name => 'BTUELFZZ'
142
- store 'annual_energy_from_electricity_for_refrigerators', :field_name => 'BTUELRFG'
143
- store 'annual_energy_from_electricity_for_air_conditioners', :field_name => 'BTUELCOL'
144
- store 'annual_energy_from_electricity_for_heating_space', :field_name => 'BTUELSPH'
145
- store 'annual_energy_from_electricity_for_heating_water', :field_name => 'BTUELWTH'
146
- store 'annual_energy_from_electricity_for_other_appliances', :field_name => 'BTUELAPL'
147
- store 'weighting', :field_name => 'NWEIGHT'
148
- store 'total_rooms', :field_name => 'TOTROOMS'
149
- store 'bathrooms', :field_name => 'NCOMBATH'
150
- store 'halfbaths', :field_name => 'NHAFBATH'
151
- store 'heated_garage', :field_name => 'GARGHEAT'
152
- store 'attached_1car_garage', :field_name => 'GARAGE1C'
153
- store 'detached_1car_garage', :field_name => 'DGARG1C'
154
- store 'attached_2car_garage', :field_name => 'GARAGE2C'
155
- store 'detached_2car_garage', :field_name => 'DGARG2C'
156
- store 'attached_3car_garage', :field_name => 'GARAGE3C'
157
- store 'detached_3car_garage', :field_name => 'DGARG3C'
158
- store 'lights_on_1_to_4_hours', :field_name => 'LGT1'
159
- store 'efficient_lights_on_1_to_4_hours', :field_name => 'LGT1EE'
160
- store 'lights_on_4_to_12_hours', :field_name => 'LGT4'
161
- store 'efficient_lights_on_4_to_12_hours', :field_name => 'LGT4EE'
162
- store 'lights_on_over_12_hours', :field_name => 'LGT12'
163
- store 'efficient_lights_on_over_12_hours', :field_name => 'LGT12EE'
164
- store 'outdoor_all_night_lights', :field_name => 'NOUTLGTNT'
165
- store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
166
- end
167
-
168
- # Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
169
- # This makes it easier to derive useful information like "how many rooms does the house have?"
170
- process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
171
- %w{
172
- annual_energy_from_electricity_for_air_conditioners
173
- annual_energy_from_electricity_for_clothes_driers
174
- annual_energy_from_electricity_for_dishwashers
175
- annual_energy_from_electricity_for_freezers
176
- annual_energy_from_electricity_for_heating_space
177
- annual_energy_from_electricity_for_heating_water
178
- annual_energy_from_electricity_for_other_appliances
179
- annual_energy_from_electricity_for_refrigerators
180
- annual_energy_from_fuel_oil_for_appliances
181
- annual_energy_from_fuel_oil_for_heating_space
182
- annual_energy_from_fuel_oil_for_heating_water
183
- annual_energy_from_kerosene
184
- annual_energy_from_propane_for_appliances
185
- annual_energy_from_propane_for_heating_space
186
- annual_energy_from_propane_for_heating_water
187
- annual_energy_from_natural_gas_for_appliances
188
- annual_energy_from_natural_gas_for_heating_space
189
- annual_energy_from_natural_gas_for_heating_water
190
- annual_energy_from_wood
191
- lights_on_1_to_4_hours
192
- lights_on_over_12_hours
193
- efficient_lights_on_over_12_hours
194
- efficient_lights_on_1_to_4_hours
195
- lights_on_4_to_12_hours
196
- efficient_lights_on_4_to_12_hours
197
- outdoor_all_night_gas_lights
198
- outdoor_all_night_lights
199
- thermostat_programmability
200
- detached_1car_garage
201
- detached_2car_garage
202
- detached_3car_garage
203
- attached_1car_garage
204
- attached_2car_garage
205
- attached_3car_garage
206
- heated_garage
207
- }.each do |attr_name|
208
- max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
209
- # if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
210
- if /^9+$/.match(max.to_i.to_s)
211
- update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
212
- end
213
- end
214
- end
215
-
216
- process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
217
- [
218
- [ 'floorspace', :square_feet, :square_metres ],
219
- [ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
220
- [ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
221
- [ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
222
- [ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
223
- [ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
224
- [ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
225
- [ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
226
- [ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
227
- [ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
228
- [ 'annual_energy_from_wood', :kbtus, :joules ],
229
- [ 'annual_energy_from_kerosene', :kbtus, :joules ],
230
- [ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
231
- [ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
232
- [ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
233
- [ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
234
- [ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
235
- [ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
236
- [ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
237
- [ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
238
- ].each do |attr_name, from_units, to_units|
239
- update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
240
- end
241
- end
242
-
243
- process 'Add a new field "rooms" that estimates how many rooms are in the house' do
244
- update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
245
- end
246
-
247
- process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
248
- update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
249
- end
250
-
251
- process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
252
- update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
253
- end
254
- end
255
- end
256
-
257
- # T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
258
- class T100FlightSegment < ActiveRecord::Base
259
- set_primary_key :row_hash
260
- URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
261
- FORM_DATA = %{
262
- UserTableName=T_100_Segment__All_Carriers&
263
- DBShortName=Air_Carriers&
264
- RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
265
- sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
266
- varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
267
- grouplist=&
268
- suml=&
269
- sumRegion=&
270
- filter1=title%3D&
271
- filter2=title%3D&
272
- geo=All%A0&
273
- time=__MONTH_NAME__&
274
- timename=Month&
275
- GEOGRAPHY=All&
276
- XYEAR=__YEAR__&
277
- FREQUENCY=__MONTH_NUMBER__&
278
- AllVars=All&
279
- VarName=DEPARTURES_SCHEDULED&
280
- VarDesc=DepScheduled&
281
- VarType=Num&
282
- VarName=DEPARTURES_PERFORMED&
283
- VarDesc=DepPerformed&
284
- VarType=Num&
285
- VarName=PAYLOAD&
286
- VarDesc=Payload&
287
- VarType=Num&
288
- VarName=SEATS&
289
- VarDesc=Seats&
290
- VarType=Num&
291
- VarName=PASSENGERS&
292
- VarDesc=Passengers&
293
- VarType=Num&
294
- VarName=FREIGHT&
295
- VarDesc=Freight&
296
- VarType=Num&
297
- VarName=MAIL&
298
- VarDesc=Mail&
299
- VarType=Num&
300
- VarName=DISTANCE&
301
- VarDesc=Distance&
302
- VarType=Num&
303
- VarName=RAMP_TO_RAMP&
304
- VarDesc=RampToRamp&
305
- VarType=Num&
306
- VarName=AIR_TIME&
307
- VarDesc=AirTime&
308
- VarType=Num&
309
- VarName=UNIQUE_CARRIER&
310
- VarDesc=UniqueCarrier&
311
- VarType=Char&
312
- VarName=AIRLINE_ID&
313
- VarDesc=AirlineID&
314
- VarType=Num&
315
- VarName=UNIQUE_CARRIER_NAME&
316
- VarDesc=UniqueCarrierName&
317
- VarType=Char&
318
- VarName=UNIQUE_CARRIER_ENTITY&
319
- VarDesc=UniqCarrierEntity&
320
- VarType=Char&
321
- VarName=REGION&
322
- VarDesc=CarrierRegion&
323
- VarType=Char&
324
- VarName=CARRIER&
325
- VarDesc=Carrier&
326
- VarType=Char&
327
- VarName=CARRIER_NAME&
328
- VarDesc=CarrierName&
329
- VarType=Char&
330
- VarName=CARRIER_GROUP&
331
- VarDesc=CarrierGroup&
332
- VarType=Num&
333
- VarName=CARRIER_GROUP_NEW&
334
- VarDesc=CarrierGroupNew&
335
- VarType=Num&
336
- VarName=ORIGIN&
337
- VarDesc=Origin&
338
- VarType=Char&
339
- VarName=ORIGIN_CITY_NAME&
340
- VarDesc=OriginCityName&
341
- VarType=Char&
342
- VarName=ORIGIN_CITY_NUM&
343
- VarDesc=OriginCityNum&
344
- VarType=Num&
345
- VarName=ORIGIN_STATE_ABR&
346
- VarDesc=OriginState&
347
- VarType=Char&
348
- VarName=ORIGIN_STATE_FIPS&
349
- VarDesc=OriginStateFips&
350
- VarType=Char&
351
- VarName=ORIGIN_STATE_NM&
352
- VarDesc=OriginStateName&
353
- VarType=Char&
354
- VarName=ORIGIN_COUNTRY&
355
- VarDesc=OriginCountry&
356
- VarType=Char&
357
- VarName=ORIGIN_COUNTRY_NAME&
358
- VarDesc=OriginCountryName&
359
- VarType=Char&
360
- VarName=ORIGIN_WAC&
361
- VarDesc=OriginWac&
362
- VarType=Num&
363
- VarName=DEST&
364
- VarDesc=Dest&
365
- VarType=Char&
366
- VarName=DEST_CITY_NAME&
367
- VarDesc=DestCityName&
368
- VarType=Char&
369
- VarName=DEST_CITY_NUM&
370
- VarDesc=DestCityNum&
371
- VarType=Num&
372
- VarName=DEST_STATE_ABR&
373
- VarDesc=DestState&
374
- VarType=Char&
375
- VarName=DEST_STATE_FIPS&
376
- VarDesc=DestStateFips&
377
- VarType=Char&
378
- VarName=DEST_STATE_NM&
379
- VarDesc=DestStateName&
380
- VarType=Char&
381
- VarName=DEST_COUNTRY&
382
- VarDesc=DestCountry&
383
- VarType=Char&
384
- VarName=DEST_COUNTRY_NAME&
385
- VarDesc=DestCountryName&
386
- VarType=Char&
387
- VarName=DEST_WAC&
388
- VarDesc=DestWac&
389
- VarType=Num&
390
- VarName=AIRCRAFT_GROUP&
391
- VarDesc=AircraftGroup&
392
- VarType=Num&
393
- VarName=AIRCRAFT_TYPE&
394
- VarDesc=AircraftType&
395
- VarType=Char&
396
- VarName=AIRCRAFT_CONFIG&
397
- VarDesc=AircraftConfig&
398
- VarType=Num&
399
- VarName=YEAR&
400
- VarDesc=Year&
401
- VarType=Num&
402
- VarName=QUARTER&
403
- VarDesc=Quarter&
404
- VarType=Num&
405
- VarName=MONTH&
406
- VarDesc=Month&
407
- VarType=Num&
408
- VarName=DISTANCE_GROUP&
409
- VarDesc=DistanceGroup&
410
- VarType=Num&
411
- VarName=CLASS&
412
- VarDesc=Class&
413
- VarType=Char&
414
- VarName=DATA_SOURCE&
415
- VarDesc=DataSource&
416
- VarType=Char
417
- }.gsub /[\s]+/,''
418
-
419
- data_miner do
420
- months = Hash.new
421
- # (2008..2009).each do |year|
422
- (2008..2008).each do |year|
423
- # (1..12).each do |month|
424
- (1..1).each do |month|
425
- time = Time.gm year, month
426
- form_data = FORM_DATA.dup
427
- form_data.gsub! '__YEAR__', time.year.to_s
428
- form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
429
- form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
430
- months[time] = form_data
431
- end
432
- end
433
- months.each do |month, form_data|
434
- import "T100 data from #{month.strftime('%B %Y')}",
435
- :url => URL,
436
- :form_data => form_data,
437
- :compression => :zip,
438
- :glob => '/*.csv' do
439
- key 'row_hash'
440
- store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
441
- store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
442
- store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
443
- store 'seats', :field_name => 'SEATS'
444
- store 'passengers', :field_name => 'PASSENGERS'
445
- store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
446
- store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
447
- store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
448
- store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
449
- store 'air_time', :field_name => 'AIR_TIME'
450
- store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
451
- store 'dot_airline_id', :field_name => 'AIRLINE_ID'
452
- store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
453
- store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
454
- store 'region', :field_name => 'REGION'
455
- store 'carrier', :field_name => 'CARRIER'
456
- store 'carrier_name', :field_name => 'CARRIER_NAME'
457
- store 'carrier_group', :field_name => 'CARRIER_GROUP'
458
- store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
459
- store 'origin_airport_iata', :field_name => 'ORIGIN'
460
- store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
461
- store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
462
- store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
463
- store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
464
- store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
465
- store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
466
- store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
467
- store 'origin_wac', :field_name => 'ORIGIN_WAC'
468
- store 'dest_airport_iata', :field_name => 'DEST'
469
- store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
470
- store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
471
- store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
472
- store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
473
- store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
474
- store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
475
- store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
476
- store 'dest_wac', :field_name => 'DEST_WAC'
477
- store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
478
- store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
479
- store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
480
- store 'year', :field_name => 'YEAR'
481
- store 'quarter', :field_name => 'QUARTER'
482
- store 'month', :field_name => 'MONTH'
483
- store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
484
- store 'bts_service_class', :field_name => 'CLASS'
485
- store 'data_source', :field_name => 'DATA_SOURCE'
486
- end
487
- end
488
-
489
- process 'Derive freight share as a fraction of payload' do
490
- update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
491
- end
492
-
493
- process 'Derive load factor, which is passengers divided by the total seats available' do
494
- update_all 'load_factor = passengers / seats', 'passengers <= seats'
495
- end
496
-
497
- process 'Derive average seats per departure' do
498
- update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
499
- end
500
- end
501
- end
502
-
503
- # note that this depends on stuff in Aircraft
504
- class AircraftDeux < ActiveRecord::Base
505
- set_primary_key :icao_code
506
-
507
- # defined on the class because we defined the errata with a shorthand
508
- class << self
509
- def is_not_attributed_to_aerospatiale?(row)
510
- not row['Manufacturer'] =~ /AEROSPATIALE/i
511
- end
512
-
513
- def is_not_attributed_to_cessna?(row)
514
- not row['Manufacturer'] =~ /CESSNA/i
515
- end
516
-
517
- def is_not_attributed_to_learjet?(row)
518
- not row['Manufacturer'] =~ /LEAR/i
519
- end
520
-
521
- def is_not_attributed_to_dehavilland?(row)
522
- not row['Manufacturer'] =~ /DE ?HAVILLAND/i
523
- end
524
-
525
- def is_not_attributed_to_mcdonnell_douglas?(row)
526
- not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
527
- end
528
-
529
- def is_not_a_dc_plane?(row)
530
- not row['Model'] =~ /DC/i
531
- end
532
-
533
- def is_a_crj_900?(row)
534
- row['Designator'].downcase == 'crj9'
535
- end
536
- end
537
-
538
- data_miner do
539
- # ('A'..'Z').each do |letter|
540
- # Note: for the purposes of testing, only importing "D"
541
- %w{ D }.each do |letter|
542
- import("ICAO codes starting with letter #{letter} used by the FAA",
543
- :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
544
- :encoding => 'windows-1252',
545
- :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
546
- :row_xpath => '//table/tr[2]/td/table/tr',
547
- :column_xpath => 'td') do
548
- key 'icao_code', :field_name => 'Designator'
549
- store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
550
- store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
551
- store 'manufacturer_name', :field_name => 'Manufacturer'
552
- store 'name', :field_name => 'Model'
553
- end
554
- end
555
- end
556
- end
557
-
558
- class AutomobileMakeFleetYear < ActiveRecord::Base
559
- set_primary_key :name
560
-
561
- col :name
562
- col :make_name
563
- col :fleet
564
- col :year, :type => :integer
565
- col :fuel_efficiency, :type => :float
566
- col :fuel_efficiency_units
567
- col :volume, :type => :integer
568
- col :make_year_name
569
- col :created_at, :type => :datetime
570
- col :updated_at, :type => :datetime
571
-
572
- data_miner do
573
- process :auto_upgrade!
574
-
575
- process "finish if i tell you to" do
576
- raise DataMiner::Finish if $force_finish
577
- end
578
-
579
- process "skip if i tell you to" do
580
- raise DataMiner::Skip if $force_skip
581
- end
582
-
583
- # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
584
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
585
- :errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
586
- :select => lambda { |row| row['volume'].to_i > 0 } do
587
- key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
588
- store 'make_name', :field_name => 'manufacturer_name'
589
- store 'year', :field_name => 'year_content'
590
- store 'fleet', :chars => 2..3 # zero-based
591
- store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
592
- store 'volume'
593
- end
594
- end
595
- end
596
-
597
- class CensusDivisionTrois < ActiveRecord::Base
598
- set_primary_key :number_code
599
-
600
- col :number_code
601
- col :name
602
- col :census_region_name
603
- col :census_region_number, :type => :integer
604
- add_index 'census_region_name', :name => 'homefry'
605
- add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
606
-
607
- data_miner do
608
- process :auto_upgrade!
609
- end
610
- end
611
-
612
- class CensusDivisionFour < ActiveRecord::Base
613
- col :number_code
614
- col :name
615
- col :census_region_name
616
- col :census_region_number, :type => :integer
617
- add_index 'census_region_name', :name => 'homefry'
618
-
619
- data_miner do
620
- process :auto_upgrade!
621
- end
622
- end
623
-
624
- # todo: have somebody properly organize these
625
- class TestOldSyntax < Test::Unit::TestCase
626
- if ENV['WIP']
627
- context 'with nullify option' do
628
- should 'treat blank fields as null values' do
629
- Aircraft.delete_all
630
- Aircraft.data_miner_runs.delete_all
631
- Aircraft.run_data_miner!
632
- assert_greater_than 0, Aircraft.count
633
- assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
634
- end
635
- end
636
- end
637
-
638
- if ENV['ALL'] == 'true'
639
- should 'directly create a table for the model' do
640
- if AutomobileMakeFleetYear.table_exists?
641
- ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
642
- end
643
- AutomobileMakeFleetYear.auto_upgrade!
644
- assert AutomobileMakeFleetYear.table_exists?
645
- end
646
- end
647
-
648
- if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
649
- should 'append to an existing config' do
650
- AutomobileFuelType.class_eval do
651
- data_miner :append => true do
652
- import 'example1', :url => 'http://example1.com' do
653
- key 'code'
654
- store 'name'
655
- end
656
- end
657
- data_miner :append => true do
658
- import 'example2', :url => 'http://example2.com' do
659
- key 'code'
660
- store 'name'
661
- end
662
- end
663
- end
664
- assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
665
- assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
666
- end
667
-
668
- should 'override an existing data_miner configuration' do
669
- AutomobileFuelType.class_eval do
670
- data_miner do
671
- import 'example', :url => 'http://example.com' do
672
- key 'code'
673
- store 'name'
674
- end
675
- end
676
- end
677
- assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
678
- assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
679
- end
680
- should "stop and finish if it gets a DataMiner::Finish" do
681
- AutomobileMakeFleetYear.delete_all
682
- AutomobileMakeFleetYear.data_miner_runs.delete_all
683
- $force_finish = true
684
- AutomobileMakeFleetYear.run_data_miner!
685
- assert_equal 0, AutomobileMakeFleetYear.count
686
- assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
687
- assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
688
- $force_finish = false
689
- AutomobileMakeFleetYear.run_data_miner!
690
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
691
- end
692
-
693
- should "stop and register skipped if it gets a DataMiner::Skip" do
694
- AutomobileMakeFleetYear.delete_all
695
- AutomobileMakeFleetYear.data_miner_runs.delete_all
696
- $force_skip = true
697
- AutomobileMakeFleetYear.run_data_miner!
698
- assert_equal 0, AutomobileMakeFleetYear.count
699
- assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
700
- assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
701
- $force_skip = false
702
- AutomobileMakeFleetYear.run_data_miner!
703
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
704
- end
705
-
706
- should "allow specifying dictionaries explicitly" do
707
- CensusDivisionDeux.run_data_miner!
708
- assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
709
- end
710
-
711
- should "be able to key on things other than the primary key" do
712
- Aircraft.run_data_miner!
713
- assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
714
- end
715
-
716
- should "be able to synthesize rows without using a full parser class" do
717
- AutomobileMakeFleetYear.run_data_miner!
718
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
719
- end
720
-
721
- should "keep a call stack so that you can call run_data_miner! on a child" do
722
- CrosscallingCensusDivision.run_data_miner!
723
- assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
724
- assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
725
- end
726
-
727
- should "keep a call stack so that you can call run_data_miner! on a parent" do
728
- CrosscallingCensusRegion.run_data_miner!
729
- assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
730
- assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
731
- end
732
-
733
- should "import airports" do
734
- Airport.run_data_miner!
735
- assert Airport.count > 0
736
- end
737
-
738
- should "pull in census divisions using a data.brighterplanet.com dictionary" do
739
- CensusDivision.run_data_miner!
740
- assert CensusDivision.count > 0
741
- end
742
-
743
- should "have a way to queue up runs that works with delated_job's send_later" do
744
- assert AutomobileVariant.respond_to?(:run_data_miner!)
745
- end
746
-
747
- should "be idempotent" do
748
- Country.data_miner_config.run
749
- a = Country.count
750
- Country.data_miner_config.run
751
- b = Country.count
752
- assert_equal a, b
753
-
754
- CensusRegion.data_miner_config.run
755
- a = CensusRegion.count
756
- CensusRegion.data_miner_config.run
757
- b = CensusRegion.count
758
- assert_equal a, b
759
- end
760
-
761
- should "hash things" do
762
- AutomobileVariant.data_miner_config.steps[0].run
763
- assert AutomobileVariant.first.row_hash.present?
764
- end
765
-
766
- should "process a callback block instead of a method" do
767
- AutomobileVariant.delete_all
768
- AutomobileVariant.data_miner_config.steps[0].run
769
- assert !AutomobileVariant.first.fuel_efficiency_city.present?
770
- AutomobileVariant.data_miner_config.steps.last.run
771
- assert AutomobileVariant.first.fuel_efficiency_city.present?
772
- end
773
-
774
- should "keep a log when it does a run" do
775
- approx_started_at = Time.now
776
- DataMiner.run :resource_names => %w{ Country }
777
- approx_terminated_at = Time.now
778
- last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
779
- assert (last_run.started_at - approx_started_at).abs < 5 # seconds
780
- assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
781
- end
782
-
783
- should "request a re-import from scratch" do
784
- c = Country.new
785
- c.iso_3166 = 'JUNK'
786
- c.save!
787
- assert Country.exists?(:iso_3166 => 'JUNK')
788
- DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
789
- assert !Country.exists?(:iso_3166 => 'JUNK')
790
- end
791
-
792
- should "know what runs were on a resource" do
793
- DataMiner.run :resource_names => %w{ Country }
794
- DataMiner.run :resource_names => %w{ Country }
795
- assert Country.data_miner_runs.count > 0
796
- end
797
- end
798
-
799
- if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
800
- should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
801
- AircraftDeux.run_data_miner!
802
- assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
803
- end
804
-
805
- should "mine aircraft" do
806
- Aircraft.run_data_miner!
807
- assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
808
- end
809
-
810
- should "mine automobile variants" do
811
- AutomobileVariant.run_data_miner!
812
- assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
813
- end
814
-
815
- should "mine T100 flight segments" do
816
- T100FlightSegment.run_data_miner!
817
- assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
818
- end
819
-
820
- should "mine residence survey responses" do
821
- ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
822
- assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
823
- end
824
- end
825
- end