data_miner 1.3.8 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
@@ -1,111 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- TestDatabase.load_models
5
-
6
- class TestDataMinerAttribute < Test::Unit::TestCase
7
- context '#value_from_row' do
8
- setup do
9
- @airport = Airport.new
10
- end
11
- context 'nullify is true' do
12
- setup do
13
- @attribute = DataMiner::Attribute.new @airport, 'latitude', :nullify => true
14
- end
15
- should 'return nil if field is blank' do
16
- assert_nil @attribute.value_from_row(
17
- 'name' => 'DTW',
18
- 'city' => 'Warren',
19
- 'country_name' => 'US',
20
- 'latitude' => '',
21
- 'longitude' => ''
22
- )
23
- end
24
- should 'return the value if field is not blank' do
25
- assert_equal '12.34', @attribute.value_from_row(
26
- 'name' => 'DTW',
27
- 'city' => 'Warren',
28
- 'country_name' => 'US',
29
- 'latitude' => '12.34',
30
- 'longitude' => ''
31
- )
32
- end
33
- end
34
- context 'nullify is false' do
35
- setup do
36
- @attribute = DataMiner::Attribute.new @airport, 'latitude'
37
- end
38
- should 'return the value if field is not blank' do
39
- assert_equal '12.34', @attribute.value_from_row(
40
- 'name' => 'DTW',
41
- 'city' => 'Warren',
42
- 'country_name' => 'US',
43
- 'latitude' => '12.34',
44
- 'longitude' => ''
45
- )
46
- end
47
- should 'return blank if field is blank' do
48
- assert_equal '', @attribute.value_from_row(
49
- 'name' => 'DTW',
50
- 'city' => 'Warren',
51
- 'country_name' => 'US',
52
- 'latitude' => '',
53
- 'longitude' => ''
54
- )
55
- end
56
- end
57
- end
58
-
59
- context '#set_record_from_row' do
60
- setup do
61
- @automobile_fuel_type = AutomobileFuelType.new
62
- end
63
- context 'nullify is true, wants units' do
64
- setup do
65
- @attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :nullify => true, :units_field_name => 'annual_distance_units'
66
- end
67
- should 'set value and units to nil if field is blank' do
68
- @attribute.set_record_from_row(@automobile_fuel_type,
69
- 'name' => 'electricity',
70
- 'annual_distance' => '',
71
- 'annual_distance_units' => ''
72
- )
73
- assert_nil @automobile_fuel_type.annual_distance
74
- assert_nil @automobile_fuel_type.annual_distance_units
75
- end
76
- should 'set value and units if field is not blank' do
77
- @attribute.set_record_from_row(@automobile_fuel_type,
78
- 'name' => 'electricity',
79
- 'annual_distance' => '100.0',
80
- 'annual_distance_units' => 'kilometres'
81
- )
82
- assert_equal 100.0, @automobile_fuel_type.annual_distance
83
- assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
84
- end
85
- end
86
-
87
- context 'nullify is false, wants units' do
88
- setup do
89
- @attribute = DataMiner::Attribute.new @automobile_fuel_type, 'annual_distance', :units_field_name => 'annual_distance_units'
90
- end
91
- should 'set value and units to blank if field is blank' do
92
- @attribute.set_record_from_row(@automobile_fuel_type,
93
- 'name' => 'electricity',
94
- 'annual_distance' => '',
95
- 'annual_distance_units' => ''
96
- )
97
- assert_equal 0.0, @automobile_fuel_type.annual_distance
98
- assert_equal '', @automobile_fuel_type.annual_distance_units
99
- end
100
- should 'set value and units if field is not blank' do
101
- @attribute.set_record_from_row(@automobile_fuel_type,
102
- 'name' => 'electricity',
103
- 'annual_distance' => '100.0',
104
- 'annual_distance_units' => 'kilometres'
105
- )
106
- assert_equal 100.0, @automobile_fuel_type.annual_distance
107
- assert_equal 'kilometres', @automobile_fuel_type.annual_distance_units
108
- end
109
- end
110
- end
111
- end
@@ -1,18 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- class TestDataMinerProcess < Test::Unit::TestCase
5
- context '#inspect' do
6
- setup do
7
- @config = DataMiner::Config.new 'foo'
8
- end
9
- should 'describe a block' do
10
- process = DataMiner::Process.new(@config, 'something cool') { }
11
- assert_match /something cool/, process.inspect
12
- end
13
- should 'describe a method' do
14
- process = DataMiner::Process.new @config, :something_cool
15
- assert_match /something_cool/, process.inspect
16
- end
17
- end
18
- end
@@ -1,825 +0,0 @@
1
- $:.push File.dirname(__FILE__)
2
- require 'helper'
3
-
4
- TestDatabase.load_models
5
-
6
- class CensusRegion < ActiveRecord::Base
7
- set_primary_key :number
8
-
9
- data_miner do
10
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
11
- key 'number', :field_name => 'Region'
12
- store 'name', :field_name => 'Name'
13
- end
14
-
15
- # pretend this is a different data source
16
- # fake! just for testing purposes
17
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
18
- key 'number', :field_name => 'Region'
19
- store 'name', :field_name => 'Name'
20
- end
21
- end
22
- end
23
-
24
- # smaller than a region
25
- class CensusDivision < ActiveRecord::Base
26
- set_primary_key :number
27
-
28
- data_miner do
29
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
30
- key 'number', :field_name => 'Division'
31
- store 'name', :field_name => 'Name'
32
- store 'census_region_number', :field_name => 'Region'
33
- store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
34
- end
35
- end
36
- end
37
-
38
- class CensusDivisionDeux < ActiveRecord::Base
39
- set_primary_key :number
40
-
41
- data_miner do
42
- import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
43
- key 'number', :field_name => 'Division'
44
- store 'name', :field_name => 'Name'
45
- store 'census_region_number', :field_name => 'Region'
46
- store 'census_region_name', :field_name => 'Region', :dictionary => DataMiner::Dictionary.new(:input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv')
47
- end
48
- end
49
- end
50
-
51
- class CrosscallingCensusRegion < ActiveRecord::Base
52
- set_primary_key :number
53
-
54
- has_many :crosscalling_census_divisions
55
-
56
- data_miner do
57
- process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
58
- CrosscallingCensusDivision.run_data_miner!
59
- connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
60
- t.column :number, :integer
61
- t.column :name, :string
62
- end
63
- connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
64
- connection.execute %{
65
- INSERT IGNORE INTO crosscalling_census_regions(number, name)
66
- SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
67
- }
68
- end
69
- end
70
- end
71
-
72
- class CrosscallingCensusDivision < ActiveRecord::Base
73
- set_primary_key :number
74
-
75
- belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
76
-
77
- data_miner do
78
- import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
79
- key 'number', :field_name => 'Division'
80
- store 'name', :field_name => 'Name'
81
- store 'census_region_number', :field_name => 'Region'
82
- store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
83
- end
84
-
85
- process "make sure my parent object is set up (i.e., cross-call it)" do
86
- CrosscallingCensusRegion.run_data_miner!
87
- end
88
- end
89
- end
90
-
91
- class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
92
- set_primary_key :department_of_energy_identifier
93
-
94
- data_miner do
95
- process 'Define some unit conversions' do
96
- Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
97
- Conversions.register :square_feet, :square_metres, 0.09290304
98
- end
99
-
100
- # conversions are NOT performed here, since we first have to zero out legitimate skips
101
- # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
102
- import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
103
- key 'department_of_energy_identifier', :field_name => 'DOEID'
104
-
105
- store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
106
- store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
107
- store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
108
- store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
109
- store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
110
- store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
111
- store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
112
- store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
113
- store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
114
-
115
- store 'census_division_number', :field_name => 'DIVISION'
116
- store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
117
- store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
118
- store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
119
-
120
- store 'floorspace', :field_name => 'TOTSQFT'
121
- store 'residents', :field_name => 'NHSLDMEM'
122
- store 'ownership', :field_name => 'KOWNRENT'
123
- store 'thermostat_programmability', :field_name => 'PROTHERM'
124
- store 'refrigerator_count', :field_name => 'NUMFRIG'
125
- store 'freezer_count', :field_name => 'NUMFREEZ'
126
- store 'heating_degree_days', :field_name => 'HD65'
127
- store 'cooling_degree_days', :field_name => 'CD65'
128
- store 'annual_energy_from_fuel_oil_for_heating_space', :field_name => 'BTUFOSPH'
129
- store 'annual_energy_from_fuel_oil_for_heating_water', :field_name => 'BTUFOWTH'
130
- store 'annual_energy_from_fuel_oil_for_appliances', :field_name => 'BTUFOAPL'
131
- store 'annual_energy_from_natural_gas_for_heating_space', :field_name => 'BTUNGSPH'
132
- store 'annual_energy_from_natural_gas_for_heating_water', :field_name => 'BTUNGWTH'
133
- store 'annual_energy_from_natural_gas_for_appliances', :field_name => 'BTUNGAPL'
134
- store 'annual_energy_from_propane_for_heating_space', :field_name => 'BTULPSPH'
135
- store 'annual_energy_from_propane_for_heating_water', :field_name => 'BTULPWTH'
136
- store 'annual_energy_from_propane_for_appliances', :field_name => 'BTULPAPL'
137
- store 'annual_energy_from_wood', :field_name => 'BTUWOOD'
138
- store 'annual_energy_from_kerosene', :field_name => 'BTUKER'
139
- store 'annual_energy_from_electricity_for_clothes_driers', :field_name => 'BTUELCDR'
140
- store 'annual_energy_from_electricity_for_dishwashers', :field_name => 'BTUELDWH'
141
- store 'annual_energy_from_electricity_for_freezers', :field_name => 'BTUELFZZ'
142
- store 'annual_energy_from_electricity_for_refrigerators', :field_name => 'BTUELRFG'
143
- store 'annual_energy_from_electricity_for_air_conditioners', :field_name => 'BTUELCOL'
144
- store 'annual_energy_from_electricity_for_heating_space', :field_name => 'BTUELSPH'
145
- store 'annual_energy_from_electricity_for_heating_water', :field_name => 'BTUELWTH'
146
- store 'annual_energy_from_electricity_for_other_appliances', :field_name => 'BTUELAPL'
147
- store 'weighting', :field_name => 'NWEIGHT'
148
- store 'total_rooms', :field_name => 'TOTROOMS'
149
- store 'bathrooms', :field_name => 'NCOMBATH'
150
- store 'halfbaths', :field_name => 'NHAFBATH'
151
- store 'heated_garage', :field_name => 'GARGHEAT'
152
- store 'attached_1car_garage', :field_name => 'GARAGE1C'
153
- store 'detached_1car_garage', :field_name => 'DGARG1C'
154
- store 'attached_2car_garage', :field_name => 'GARAGE2C'
155
- store 'detached_2car_garage', :field_name => 'DGARG2C'
156
- store 'attached_3car_garage', :field_name => 'GARAGE3C'
157
- store 'detached_3car_garage', :field_name => 'DGARG3C'
158
- store 'lights_on_1_to_4_hours', :field_name => 'LGT1'
159
- store 'efficient_lights_on_1_to_4_hours', :field_name => 'LGT1EE'
160
- store 'lights_on_4_to_12_hours', :field_name => 'LGT4'
161
- store 'efficient_lights_on_4_to_12_hours', :field_name => 'LGT4EE'
162
- store 'lights_on_over_12_hours', :field_name => 'LGT12'
163
- store 'efficient_lights_on_over_12_hours', :field_name => 'LGT12EE'
164
- store 'outdoor_all_night_lights', :field_name => 'NOUTLGTNT'
165
- store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
166
- end
167
-
168
- # Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
169
- # This makes it easier to derive useful information like "how many rooms does the house have?"
170
- process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
171
- %w{
172
- annual_energy_from_electricity_for_air_conditioners
173
- annual_energy_from_electricity_for_clothes_driers
174
- annual_energy_from_electricity_for_dishwashers
175
- annual_energy_from_electricity_for_freezers
176
- annual_energy_from_electricity_for_heating_space
177
- annual_energy_from_electricity_for_heating_water
178
- annual_energy_from_electricity_for_other_appliances
179
- annual_energy_from_electricity_for_refrigerators
180
- annual_energy_from_fuel_oil_for_appliances
181
- annual_energy_from_fuel_oil_for_heating_space
182
- annual_energy_from_fuel_oil_for_heating_water
183
- annual_energy_from_kerosene
184
- annual_energy_from_propane_for_appliances
185
- annual_energy_from_propane_for_heating_space
186
- annual_energy_from_propane_for_heating_water
187
- annual_energy_from_natural_gas_for_appliances
188
- annual_energy_from_natural_gas_for_heating_space
189
- annual_energy_from_natural_gas_for_heating_water
190
- annual_energy_from_wood
191
- lights_on_1_to_4_hours
192
- lights_on_over_12_hours
193
- efficient_lights_on_over_12_hours
194
- efficient_lights_on_1_to_4_hours
195
- lights_on_4_to_12_hours
196
- efficient_lights_on_4_to_12_hours
197
- outdoor_all_night_gas_lights
198
- outdoor_all_night_lights
199
- thermostat_programmability
200
- detached_1car_garage
201
- detached_2car_garage
202
- detached_3car_garage
203
- attached_1car_garage
204
- attached_2car_garage
205
- attached_3car_garage
206
- heated_garage
207
- }.each do |attr_name|
208
- max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
209
- # if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
210
- if /^9+$/.match(max.to_i.to_s)
211
- update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
212
- end
213
- end
214
- end
215
-
216
- process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
217
- [
218
- [ 'floorspace', :square_feet, :square_metres ],
219
- [ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
220
- [ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
221
- [ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
222
- [ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
223
- [ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
224
- [ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
225
- [ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
226
- [ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
227
- [ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
228
- [ 'annual_energy_from_wood', :kbtus, :joules ],
229
- [ 'annual_energy_from_kerosene', :kbtus, :joules ],
230
- [ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
231
- [ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
232
- [ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
233
- [ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
234
- [ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
235
- [ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
236
- [ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
237
- [ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
238
- ].each do |attr_name, from_units, to_units|
239
- update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
240
- end
241
- end
242
-
243
- process 'Add a new field "rooms" that estimates how many rooms are in the house' do
244
- update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
245
- end
246
-
247
- process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
248
- update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
249
- end
250
-
251
- process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
252
- update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
253
- end
254
- end
255
- end
256
-
257
- # T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
258
- class T100FlightSegment < ActiveRecord::Base
259
- set_primary_key :row_hash
260
- URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
261
- FORM_DATA = %{
262
- UserTableName=T_100_Segment__All_Carriers&
263
- DBShortName=Air_Carriers&
264
- RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
265
- sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
266
- varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
267
- grouplist=&
268
- suml=&
269
- sumRegion=&
270
- filter1=title%3D&
271
- filter2=title%3D&
272
- geo=All%A0&
273
- time=__MONTH_NAME__&
274
- timename=Month&
275
- GEOGRAPHY=All&
276
- XYEAR=__YEAR__&
277
- FREQUENCY=__MONTH_NUMBER__&
278
- AllVars=All&
279
- VarName=DEPARTURES_SCHEDULED&
280
- VarDesc=DepScheduled&
281
- VarType=Num&
282
- VarName=DEPARTURES_PERFORMED&
283
- VarDesc=DepPerformed&
284
- VarType=Num&
285
- VarName=PAYLOAD&
286
- VarDesc=Payload&
287
- VarType=Num&
288
- VarName=SEATS&
289
- VarDesc=Seats&
290
- VarType=Num&
291
- VarName=PASSENGERS&
292
- VarDesc=Passengers&
293
- VarType=Num&
294
- VarName=FREIGHT&
295
- VarDesc=Freight&
296
- VarType=Num&
297
- VarName=MAIL&
298
- VarDesc=Mail&
299
- VarType=Num&
300
- VarName=DISTANCE&
301
- VarDesc=Distance&
302
- VarType=Num&
303
- VarName=RAMP_TO_RAMP&
304
- VarDesc=RampToRamp&
305
- VarType=Num&
306
- VarName=AIR_TIME&
307
- VarDesc=AirTime&
308
- VarType=Num&
309
- VarName=UNIQUE_CARRIER&
310
- VarDesc=UniqueCarrier&
311
- VarType=Char&
312
- VarName=AIRLINE_ID&
313
- VarDesc=AirlineID&
314
- VarType=Num&
315
- VarName=UNIQUE_CARRIER_NAME&
316
- VarDesc=UniqueCarrierName&
317
- VarType=Char&
318
- VarName=UNIQUE_CARRIER_ENTITY&
319
- VarDesc=UniqCarrierEntity&
320
- VarType=Char&
321
- VarName=REGION&
322
- VarDesc=CarrierRegion&
323
- VarType=Char&
324
- VarName=CARRIER&
325
- VarDesc=Carrier&
326
- VarType=Char&
327
- VarName=CARRIER_NAME&
328
- VarDesc=CarrierName&
329
- VarType=Char&
330
- VarName=CARRIER_GROUP&
331
- VarDesc=CarrierGroup&
332
- VarType=Num&
333
- VarName=CARRIER_GROUP_NEW&
334
- VarDesc=CarrierGroupNew&
335
- VarType=Num&
336
- VarName=ORIGIN&
337
- VarDesc=Origin&
338
- VarType=Char&
339
- VarName=ORIGIN_CITY_NAME&
340
- VarDesc=OriginCityName&
341
- VarType=Char&
342
- VarName=ORIGIN_CITY_NUM&
343
- VarDesc=OriginCityNum&
344
- VarType=Num&
345
- VarName=ORIGIN_STATE_ABR&
346
- VarDesc=OriginState&
347
- VarType=Char&
348
- VarName=ORIGIN_STATE_FIPS&
349
- VarDesc=OriginStateFips&
350
- VarType=Char&
351
- VarName=ORIGIN_STATE_NM&
352
- VarDesc=OriginStateName&
353
- VarType=Char&
354
- VarName=ORIGIN_COUNTRY&
355
- VarDesc=OriginCountry&
356
- VarType=Char&
357
- VarName=ORIGIN_COUNTRY_NAME&
358
- VarDesc=OriginCountryName&
359
- VarType=Char&
360
- VarName=ORIGIN_WAC&
361
- VarDesc=OriginWac&
362
- VarType=Num&
363
- VarName=DEST&
364
- VarDesc=Dest&
365
- VarType=Char&
366
- VarName=DEST_CITY_NAME&
367
- VarDesc=DestCityName&
368
- VarType=Char&
369
- VarName=DEST_CITY_NUM&
370
- VarDesc=DestCityNum&
371
- VarType=Num&
372
- VarName=DEST_STATE_ABR&
373
- VarDesc=DestState&
374
- VarType=Char&
375
- VarName=DEST_STATE_FIPS&
376
- VarDesc=DestStateFips&
377
- VarType=Char&
378
- VarName=DEST_STATE_NM&
379
- VarDesc=DestStateName&
380
- VarType=Char&
381
- VarName=DEST_COUNTRY&
382
- VarDesc=DestCountry&
383
- VarType=Char&
384
- VarName=DEST_COUNTRY_NAME&
385
- VarDesc=DestCountryName&
386
- VarType=Char&
387
- VarName=DEST_WAC&
388
- VarDesc=DestWac&
389
- VarType=Num&
390
- VarName=AIRCRAFT_GROUP&
391
- VarDesc=AircraftGroup&
392
- VarType=Num&
393
- VarName=AIRCRAFT_TYPE&
394
- VarDesc=AircraftType&
395
- VarType=Char&
396
- VarName=AIRCRAFT_CONFIG&
397
- VarDesc=AircraftConfig&
398
- VarType=Num&
399
- VarName=YEAR&
400
- VarDesc=Year&
401
- VarType=Num&
402
- VarName=QUARTER&
403
- VarDesc=Quarter&
404
- VarType=Num&
405
- VarName=MONTH&
406
- VarDesc=Month&
407
- VarType=Num&
408
- VarName=DISTANCE_GROUP&
409
- VarDesc=DistanceGroup&
410
- VarType=Num&
411
- VarName=CLASS&
412
- VarDesc=Class&
413
- VarType=Char&
414
- VarName=DATA_SOURCE&
415
- VarDesc=DataSource&
416
- VarType=Char
417
- }.gsub /[\s]+/,''
418
-
419
- data_miner do
420
- months = Hash.new
421
- # (2008..2009).each do |year|
422
- (2008..2008).each do |year|
423
- # (1..12).each do |month|
424
- (1..1).each do |month|
425
- time = Time.gm year, month
426
- form_data = FORM_DATA.dup
427
- form_data.gsub! '__YEAR__', time.year.to_s
428
- form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
429
- form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
430
- months[time] = form_data
431
- end
432
- end
433
- months.each do |month, form_data|
434
- import "T100 data from #{month.strftime('%B %Y')}",
435
- :url => URL,
436
- :form_data => form_data,
437
- :compression => :zip,
438
- :glob => '/*.csv' do
439
- key 'row_hash'
440
- store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
441
- store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
442
- store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
443
- store 'seats', :field_name => 'SEATS'
444
- store 'passengers', :field_name => 'PASSENGERS'
445
- store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
446
- store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
447
- store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
448
- store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
449
- store 'air_time', :field_name => 'AIR_TIME'
450
- store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
451
- store 'dot_airline_id', :field_name => 'AIRLINE_ID'
452
- store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
453
- store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
454
- store 'region', :field_name => 'REGION'
455
- store 'carrier', :field_name => 'CARRIER'
456
- store 'carrier_name', :field_name => 'CARRIER_NAME'
457
- store 'carrier_group', :field_name => 'CARRIER_GROUP'
458
- store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
459
- store 'origin_airport_iata', :field_name => 'ORIGIN'
460
- store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
461
- store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
462
- store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
463
- store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
464
- store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
465
- store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
466
- store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
467
- store 'origin_wac', :field_name => 'ORIGIN_WAC'
468
- store 'dest_airport_iata', :field_name => 'DEST'
469
- store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
470
- store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
471
- store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
472
- store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
473
- store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
474
- store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
475
- store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
476
- store 'dest_wac', :field_name => 'DEST_WAC'
477
- store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
478
- store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
479
- store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
480
- store 'year', :field_name => 'YEAR'
481
- store 'quarter', :field_name => 'QUARTER'
482
- store 'month', :field_name => 'MONTH'
483
- store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
484
- store 'bts_service_class', :field_name => 'CLASS'
485
- store 'data_source', :field_name => 'DATA_SOURCE'
486
- end
487
- end
488
-
489
- process 'Derive freight share as a fraction of payload' do
490
- update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
491
- end
492
-
493
- process 'Derive load factor, which is passengers divided by the total seats available' do
494
- update_all 'load_factor = passengers / seats', 'passengers <= seats'
495
- end
496
-
497
- process 'Derive average seats per departure' do
498
- update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
499
- end
500
- end
501
- end
502
-
503
- # note that this depends on stuff in Aircraft
504
- class AircraftDeux < ActiveRecord::Base
505
- set_primary_key :icao_code
506
-
507
- # defined on the class because we defined the errata with a shorthand
508
- class << self
509
- def is_not_attributed_to_aerospatiale?(row)
510
- not row['Manufacturer'] =~ /AEROSPATIALE/i
511
- end
512
-
513
- def is_not_attributed_to_cessna?(row)
514
- not row['Manufacturer'] =~ /CESSNA/i
515
- end
516
-
517
- def is_not_attributed_to_learjet?(row)
518
- not row['Manufacturer'] =~ /LEAR/i
519
- end
520
-
521
- def is_not_attributed_to_dehavilland?(row)
522
- not row['Manufacturer'] =~ /DE ?HAVILLAND/i
523
- end
524
-
525
- def is_not_attributed_to_mcdonnell_douglas?(row)
526
- not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
527
- end
528
-
529
- def is_not_a_dc_plane?(row)
530
- not row['Model'] =~ /DC/i
531
- end
532
-
533
- def is_a_crj_900?(row)
534
- row['Designator'].downcase == 'crj9'
535
- end
536
- end
537
-
538
- data_miner do
539
- # ('A'..'Z').each do |letter|
540
- # Note: for the purposes of testing, only importing "D"
541
- %w{ D }.each do |letter|
542
- import("ICAO codes starting with letter #{letter} used by the FAA",
543
- :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
544
- :encoding => 'windows-1252',
545
- :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
546
- :row_xpath => '//table/tr[2]/td/table/tr',
547
- :column_xpath => 'td') do
548
- key 'icao_code', :field_name => 'Designator'
549
- store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
550
- store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
551
- store 'manufacturer_name', :field_name => 'Manufacturer'
552
- store 'name', :field_name => 'Model'
553
- end
554
- end
555
- end
556
- end
557
-
558
- class AutomobileMakeFleetYear < ActiveRecord::Base
559
- set_primary_key :name
560
-
561
- col :name
562
- col :make_name
563
- col :fleet
564
- col :year, :type => :integer
565
- col :fuel_efficiency, :type => :float
566
- col :fuel_efficiency_units
567
- col :volume, :type => :integer
568
- col :make_year_name
569
- col :created_at, :type => :datetime
570
- col :updated_at, :type => :datetime
571
-
572
- data_miner do
573
- process :auto_upgrade!
574
-
575
- process "finish if i tell you to" do
576
- raise DataMiner::Finish if $force_finish
577
- end
578
-
579
- process "skip if i tell you to" do
580
- raise DataMiner::Skip if $force_skip
581
- end
582
-
583
- # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
584
- import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
585
- :errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
586
- :select => lambda { |row| row['volume'].to_i > 0 } do
587
- key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
588
- store 'make_name', :field_name => 'manufacturer_name'
589
- store 'year', :field_name => 'year_content'
590
- store 'fleet', :chars => 2..3 # zero-based
591
- store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
592
- store 'volume'
593
- end
594
- end
595
- end
596
-
597
- class CensusDivisionTrois < ActiveRecord::Base
598
- set_primary_key :number_code
599
-
600
- col :number_code
601
- col :name
602
- col :census_region_name
603
- col :census_region_number, :type => :integer
604
- add_index 'census_region_name', :name => 'homefry'
605
- add_index ['number_code', 'name', 'census_region_name', 'census_region_number']
606
-
607
- data_miner do
608
- process :auto_upgrade!
609
- end
610
- end
611
-
612
- class CensusDivisionFour < ActiveRecord::Base
613
- col :number_code
614
- col :name
615
- col :census_region_name
616
- col :census_region_number, :type => :integer
617
- add_index 'census_region_name', :name => 'homefry'
618
-
619
- data_miner do
620
- process :auto_upgrade!
621
- end
622
- end
623
-
624
- # todo: have somebody properly organize these
625
- class TestOldSyntax < Test::Unit::TestCase
626
- if ENV['WIP']
627
- context 'with nullify option' do
628
- should 'treat blank fields as null values' do
629
- Aircraft.delete_all
630
- Aircraft.data_miner_runs.delete_all
631
- Aircraft.run_data_miner!
632
- assert_greater_than 0, Aircraft.count
633
- assert_false Aircraft.where(:brighter_planet_aircraft_class_code => nil).empty?
634
- end
635
- end
636
- end
637
-
638
- if ENV['ALL'] == 'true'
639
- should 'directly create a table for the model' do
640
- if AutomobileMakeFleetYear.table_exists?
641
- ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
642
- end
643
- AutomobileMakeFleetYear.auto_upgrade!
644
- assert AutomobileMakeFleetYear.table_exists?
645
- end
646
- end
647
-
648
- if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
649
- should 'append to an existing config' do
650
- AutomobileFuelType.class_eval do
651
- data_miner :append => true do
652
- import 'example1', :url => 'http://example1.com' do
653
- key 'code'
654
- store 'name'
655
- end
656
- end
657
- data_miner :append => true do
658
- import 'example2', :url => 'http://example2.com' do
659
- key 'code'
660
- store 'name'
661
- end
662
- end
663
- end
664
- assert_equal 'http://example1.com', AutomobileFuelType.data_miner_config.steps[-2].table.url
665
- assert_equal 'http://example2.com', AutomobileFuelType.data_miner_config.steps[-1].table.url
666
- end
667
-
668
- should 'override an existing data_miner configuration' do
669
- AutomobileFuelType.class_eval do
670
- data_miner do
671
- import 'example', :url => 'http://example.com' do
672
- key 'code'
673
- store 'name'
674
- end
675
- end
676
- end
677
- assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
678
- assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
679
- end
680
- should "stop and finish if it gets a DataMiner::Finish" do
681
- AutomobileMakeFleetYear.delete_all
682
- AutomobileMakeFleetYear.data_miner_runs.delete_all
683
- $force_finish = true
684
- AutomobileMakeFleetYear.run_data_miner!
685
- assert_equal 0, AutomobileMakeFleetYear.count
686
- assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
687
- assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
688
- $force_finish = false
689
- AutomobileMakeFleetYear.run_data_miner!
690
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
691
- end
692
-
693
- should "stop and register skipped if it gets a DataMiner::Skip" do
694
- AutomobileMakeFleetYear.delete_all
695
- AutomobileMakeFleetYear.data_miner_runs.delete_all
696
- $force_skip = true
697
- AutomobileMakeFleetYear.run_data_miner!
698
- assert_equal 0, AutomobileMakeFleetYear.count
699
- assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
700
- assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
701
- $force_skip = false
702
- AutomobileMakeFleetYear.run_data_miner!
703
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
704
- end
705
-
706
- should "allow specifying dictionaries explicitly" do
707
- CensusDivisionDeux.run_data_miner!
708
- assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
709
- end
710
-
711
- should "be able to key on things other than the primary key" do
712
- Aircraft.run_data_miner!
713
- assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
714
- end
715
-
716
- should "be able to synthesize rows without using a full parser class" do
717
- AutomobileMakeFleetYear.run_data_miner!
718
- assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
719
- end
720
-
721
- should "keep a call stack so that you can call run_data_miner! on a child" do
722
- CrosscallingCensusDivision.run_data_miner!
723
- assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
724
- assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
725
- end
726
-
727
- should "keep a call stack so that you can call run_data_miner! on a parent" do
728
- CrosscallingCensusRegion.run_data_miner!
729
- assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
730
- assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
731
- end
732
-
733
- should "import airports" do
734
- Airport.run_data_miner!
735
- assert Airport.count > 0
736
- end
737
-
738
- should "pull in census divisions using a data.brighterplanet.com dictionary" do
739
- CensusDivision.run_data_miner!
740
- assert CensusDivision.count > 0
741
- end
742
-
743
- should "have a way to queue up runs that works with delated_job's send_later" do
744
- assert AutomobileVariant.respond_to?(:run_data_miner!)
745
- end
746
-
747
- should "be idempotent" do
748
- Country.data_miner_config.run
749
- a = Country.count
750
- Country.data_miner_config.run
751
- b = Country.count
752
- assert_equal a, b
753
-
754
- CensusRegion.data_miner_config.run
755
- a = CensusRegion.count
756
- CensusRegion.data_miner_config.run
757
- b = CensusRegion.count
758
- assert_equal a, b
759
- end
760
-
761
- should "hash things" do
762
- AutomobileVariant.data_miner_config.steps[0].run
763
- assert AutomobileVariant.first.row_hash.present?
764
- end
765
-
766
- should "process a callback block instead of a method" do
767
- AutomobileVariant.delete_all
768
- AutomobileVariant.data_miner_config.steps[0].run
769
- assert !AutomobileVariant.first.fuel_efficiency_city.present?
770
- AutomobileVariant.data_miner_config.steps.last.run
771
- assert AutomobileVariant.first.fuel_efficiency_city.present?
772
- end
773
-
774
- should "keep a log when it does a run" do
775
- approx_started_at = Time.now
776
- DataMiner.run :resource_names => %w{ Country }
777
- approx_terminated_at = Time.now
778
- last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
779
- assert (last_run.started_at - approx_started_at).abs < 5 # seconds
780
- assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
781
- end
782
-
783
- should "request a re-import from scratch" do
784
- c = Country.new
785
- c.iso_3166 = 'JUNK'
786
- c.save!
787
- assert Country.exists?(:iso_3166 => 'JUNK')
788
- DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
789
- assert !Country.exists?(:iso_3166 => 'JUNK')
790
- end
791
-
792
- should "know what runs were on a resource" do
793
- DataMiner.run :resource_names => %w{ Country }
794
- DataMiner.run :resource_names => %w{ Country }
795
- assert Country.data_miner_runs.count > 0
796
- end
797
- end
798
-
799
- if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
800
- should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
801
- AircraftDeux.run_data_miner!
802
- assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
803
- end
804
-
805
- should "mine aircraft" do
806
- Aircraft.run_data_miner!
807
- assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
808
- end
809
-
810
- should "mine automobile variants" do
811
- AutomobileVariant.run_data_miner!
812
- assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
813
- end
814
-
815
- should "mine T100 flight segments" do
816
- T100FlightSegment.run_data_miner!
817
- assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
818
- end
819
-
820
- should "mine residence survey responses" do
821
- ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
822
- assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
823
- end
824
- end
825
- end