data_miner-ruby19 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +271 -0
- data/lib/data_miner.rb +136 -0
- data/lib/data_miner/attribute.rb +233 -0
- data/lib/data_miner/base.rb +194 -0
- data/lib/data_miner/dictionary.rb +36 -0
- data/lib/data_miner/import.rb +70 -0
- data/lib/data_miner/process.rb +37 -0
- data/lib/data_miner/run.rb +26 -0
- data/lib/data_miner/schema.rb +244 -0
- data/lib/data_miner/tap.rb +146 -0
- data/test/data_miner_test.rb +1399 -0
- data/test/test_helper.rb +307 -0
- metadata +330 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
|
2
|
+
module DataMiner
|
3
|
+
class Tap
|
4
|
+
attr_reader :base
|
5
|
+
attr_reader :position_in_run
|
6
|
+
attr_reader :description
|
7
|
+
attr_reader :source
|
8
|
+
attr_reader :options
|
9
|
+
delegate :resource, :to => :base
|
10
|
+
|
11
|
+
def initialize(base, position_in_run, description, source, options = {})
|
12
|
+
options.symbolize_keys!
|
13
|
+
DataMiner.log_or_raise "Tap has to be the first step." unless position_in_run == 0
|
14
|
+
@base = base
|
15
|
+
@position_in_run = position_in_run
|
16
|
+
@description = description
|
17
|
+
@source = source
|
18
|
+
@options = options
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
"Tap(#{resource}): #{description} (#{source})"
|
23
|
+
end
|
24
|
+
|
25
|
+
def run(run)
|
26
|
+
[ source_table_name, resource.table_name ].each do |possible_obstacle|
|
27
|
+
if connection.table_exists?(possible_obstacle)
|
28
|
+
connection.drop_table possible_obstacle
|
29
|
+
end
|
30
|
+
end
|
31
|
+
DataMiner.backtick_with_reporting taps_pull_cmd
|
32
|
+
if needs_table_rename?
|
33
|
+
connection.rename_table source_table_name, resource.table_name
|
34
|
+
end
|
35
|
+
DataMiner.log_info "ran #{inspect}"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def connection
|
41
|
+
ActiveRecord::Base.connection
|
42
|
+
end
|
43
|
+
|
44
|
+
def db_config
|
45
|
+
@_db_config ||= connection.instance_variable_get(:@config).dup.merge(options.except(:source_table_name))
|
46
|
+
end
|
47
|
+
|
48
|
+
def source_table_name
|
49
|
+
options[:source_table_name] || resource.table_name
|
50
|
+
end
|
51
|
+
|
52
|
+
def needs_table_rename?
|
53
|
+
source_table_name != resource.table_name
|
54
|
+
end
|
55
|
+
|
56
|
+
def adapter
|
57
|
+
case connection.adapter_name
|
58
|
+
when /mysql/i
|
59
|
+
'mysql'
|
60
|
+
when /postgres/i
|
61
|
+
'postgres'
|
62
|
+
when /sqlite/i
|
63
|
+
'sqlite'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# never optional
|
68
|
+
def database
|
69
|
+
db_config[:database]
|
70
|
+
end
|
71
|
+
|
72
|
+
DEFAULT_PORTS = {
|
73
|
+
'mysql' => 3306,
|
74
|
+
'postgres' => 5432
|
75
|
+
}
|
76
|
+
|
77
|
+
DEFAULT_USERNAMES = {
|
78
|
+
'mysql' => 'root',
|
79
|
+
'postgres' => ''
|
80
|
+
}
|
81
|
+
|
82
|
+
DEFAULT_PASSWORDS = {}
|
83
|
+
DEFAULT_PASSWORDS.default = ''
|
84
|
+
|
85
|
+
DEFAULT_HOSTS = {}
|
86
|
+
DEFAULT_HOSTS.default = 'localhost'
|
87
|
+
|
88
|
+
%w{ username password port host }.each do |x|
|
89
|
+
module_eval %{
|
90
|
+
def #{x}
|
91
|
+
db_config[:#{x}] || DEFAULT_#{x.upcase}S[adapter]
|
92
|
+
end
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
def db_locator
|
97
|
+
case adapter
|
98
|
+
when 'mysql', 'postgres'
|
99
|
+
"#{username}:#{password}@#{host}:#{port}/#{database}"
|
100
|
+
when 'sqlite'
|
101
|
+
database
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# taps pull mysql://root:password@localhost/taps_test http://foo:bar@data.brighterplanet.com:5000 --tables aircraft
|
106
|
+
def taps_pull_cmd
|
107
|
+
Escape.shell_command [
|
108
|
+
'taps',
|
109
|
+
'pull',
|
110
|
+
"#{adapter}://#{db_locator}",
|
111
|
+
source,
|
112
|
+
'--indexes-first',
|
113
|
+
'--tables',
|
114
|
+
source_table_name
|
115
|
+
]
|
116
|
+
# "taps pull #{source} --indexes-first --tables #{source_table_name}"
|
117
|
+
end
|
118
|
+
|
119
|
+
# 2.3.5 mysql
|
120
|
+
# * <tt>:host</tt> - Defaults to "localhost".
|
121
|
+
# * <tt>:port</tt> - Defaults to 3306.
|
122
|
+
# * <tt>:socket</tt> - Defaults to "/tmp/mysql.sock".
|
123
|
+
# * <tt>:username</tt> - Defaults to "root"
|
124
|
+
# * <tt>:password</tt> - Defaults to nothing.
|
125
|
+
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
126
|
+
# * <tt>:encoding</tt> - (Optional) Sets the client encoding by executing "SET NAMES <encoding>" after connection.
|
127
|
+
# * <tt>:reconnect</tt> - Defaults to false (See MySQL documentation: http://dev.mysql.com/doc/refman/5.0/en/auto-reconnect.html).
|
128
|
+
# * <tt>:sslca</tt> - Necessary to use MySQL with an SSL connection.
|
129
|
+
# * <tt>:sslkey</tt> - Necessary to use MySQL with an SSL connection.
|
130
|
+
# * <tt>:sslcert</tt> - Necessary to use MySQL with an SSL connection.
|
131
|
+
# * <tt>:sslcapath</tt> - Necessary to use MySQL with an SSL connection.
|
132
|
+
# * <tt>:sslcipher</tt> - Necessary to use MySQL with an SSL connection.
|
133
|
+
# 2.3.5 mysql
|
134
|
+
# * <tt>:host</tt> - Defaults to "localhost".
|
135
|
+
# * <tt>:port</tt> - Defaults to 5432.
|
136
|
+
# * <tt>:username</tt> - Defaults to nothing.
|
137
|
+
# * <tt>:password</tt> - Defaults to nothing.
|
138
|
+
# * <tt>:database</tt> - The name of the database. No default, must be provided.
|
139
|
+
# * <tt>:schema_search_path</tt> - An optional schema search path for the connection given as a string of comma-separated schema names. This is backward-compatible with the <tt>:schema_order</tt> option.
|
140
|
+
# * <tt>:encoding</tt> - An optional client encoding that is used in a <tt>SET client_encoding TO <encoding></tt> call on the connection.
|
141
|
+
# * <tt>:min_messages</tt> - An optional client min messages that is used in a <tt>SET client_min_messages TO <min_messages></tt> call on the connection.
|
142
|
+
# * <tt>:allow_concurrency</tt> - If true, use async query methods so Ruby threads don't deadlock; otherwise, use blocking query methods.
|
143
|
+
# 2.3.5 sqlite[3]
|
144
|
+
# * <tt>:database</tt> - Path to the database file.
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,1399 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AutomobileFuelType < ActiveRecord::Base
|
4
|
+
set_primary_key :code
|
5
|
+
|
6
|
+
data_miner do
|
7
|
+
import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
8
|
+
:filename => 'Gd6-dsc.txt',
|
9
|
+
:format => :fixed_width,
|
10
|
+
:crop => 21..26, # inclusive
|
11
|
+
:cut => '2-',
|
12
|
+
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
13
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
14
|
+
[ 'spacer', 2 ],
|
15
|
+
[ 'name', 52, { :type => :string } ]]) do
|
16
|
+
key 'code'
|
17
|
+
store 'name'
|
18
|
+
end
|
19
|
+
|
20
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do
|
21
|
+
key 'code'
|
22
|
+
store 'name'
|
23
|
+
store 'annual_distance'
|
24
|
+
store 'emission_factor'
|
25
|
+
end
|
26
|
+
|
27
|
+
# pull electricity emission factor from residential electricity
|
28
|
+
import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ',
|
29
|
+
:select => lambda { |row| row['code'] == 'El' }) do
|
30
|
+
key 'code'
|
31
|
+
store 'name'
|
32
|
+
store 'emission_factor'
|
33
|
+
end
|
34
|
+
|
35
|
+
# still need distance estimate for electric cars
|
36
|
+
end
|
37
|
+
|
38
|
+
CODES = {
|
39
|
+
:electricity => 'El',
|
40
|
+
:diesel => 'D'
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
class AutomobileVariant < ActiveRecord::Base
|
45
|
+
set_primary_key :row_hash
|
46
|
+
|
47
|
+
module FuelEconomyGuide
|
48
|
+
TRANSMISSIONS = {
|
49
|
+
'A' => 'automatic',
|
50
|
+
'M' => 'manual',
|
51
|
+
'L' => 'automatic', # Lockup/automatic
|
52
|
+
'S' => 'semiautomatic', # Semiautomatic
|
53
|
+
'C' => 'manual' # TODO verify for VW Syncro
|
54
|
+
}
|
55
|
+
|
56
|
+
ENGINE_TYPES = {
|
57
|
+
'(GUZZLER)' => nil, # "gas guzzler"
|
58
|
+
'(POLICE)' => nil, # police automobile_variant
|
59
|
+
'(MPFI)' => 'injection',
|
60
|
+
'(MPI*)' => 'injection',
|
61
|
+
'(SPFI)' => 'injection',
|
62
|
+
'(FFS)' => 'injection',
|
63
|
+
'(TURBO)' => 'turbo',
|
64
|
+
'(TRBO)' => 'turbo',
|
65
|
+
'(TC*)' => 'turbo',
|
66
|
+
'(FFS,TRBO)' => %w(injection turbo),
|
67
|
+
'(S-CHARGE)' => 'supercharger',
|
68
|
+
'(SC*)' => 'supercharger',
|
69
|
+
'(DIESEL)' => nil, # diesel
|
70
|
+
'(DSL)' => nil, # diesel
|
71
|
+
'(ROTARY)' => nil, # rotary
|
72
|
+
'(VARIABLE)' => nil, # variable displacement
|
73
|
+
'(NO-CAT)' => nil, # no catalytic converter
|
74
|
+
'(OHC)' => nil, # overhead camshaft
|
75
|
+
'(OHV)' => nil, # overhead valves
|
76
|
+
'(16-VALVE)' => nil, # 16V
|
77
|
+
'(305)' => nil, # 305 cubic inch displacement
|
78
|
+
'(307)' => nil, # 307 cubic inch displacement
|
79
|
+
'(M-ENG)' => nil,
|
80
|
+
'(W-ENG)' => nil,
|
81
|
+
'(GM-BUICK)' => nil,
|
82
|
+
'(GM-CHEV)' => nil,
|
83
|
+
'(GM-OLDS)' => nil,
|
84
|
+
'(GM-PONT)' => nil,
|
85
|
+
}
|
86
|
+
|
87
|
+
class ParserB
|
88
|
+
attr_accessor :year
|
89
|
+
def initialize(options = {})
|
90
|
+
@year = options[:year]
|
91
|
+
end
|
92
|
+
|
93
|
+
def apply(row)
|
94
|
+
row.merge!({
|
95
|
+
'make' => row['carline_mfr_name'], # make it line up with the errata
|
96
|
+
'model' => row['carline_name'], # ditto
|
97
|
+
'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]],
|
98
|
+
'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1],
|
99
|
+
'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'),
|
100
|
+
'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'),
|
101
|
+
'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'),
|
102
|
+
'displacement' => _displacement(row['opt_disp']),
|
103
|
+
'year' => year
|
104
|
+
})
|
105
|
+
row
|
106
|
+
end
|
107
|
+
|
108
|
+
def _displacement(str)
|
109
|
+
str = str.gsub(/[\(\)]/, '').strip
|
110
|
+
if str =~ /^(.+)L$/
|
111
|
+
$1.to_f
|
112
|
+
elsif str =~ /^(.+)CC$/
|
113
|
+
$1.to_f / 1000
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def add_hints!(bus)
|
118
|
+
bus[:format] = :fixed_width
|
119
|
+
bus[:cut] = '13-' if year == 1995
|
120
|
+
bus[:schema_name] = :fuel_economy_guide_b
|
121
|
+
bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
|
122
|
+
Slither.define :fuel_economy_guide_b do |d|
|
123
|
+
d.rows do |row|
|
124
|
+
row.trap { true } # there's only one section
|
125
|
+
row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
|
126
|
+
row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
|
127
|
+
row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
|
128
|
+
row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
|
129
|
+
row.column 'carline_name' , 28, :type => :string # CARLINE NAME
|
130
|
+
row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
|
131
|
+
row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
|
132
|
+
row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
|
133
|
+
row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
|
134
|
+
row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
|
135
|
+
row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
|
136
|
+
row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
|
137
|
+
row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
|
138
|
+
row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
|
139
|
+
row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
|
140
|
+
row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
|
141
|
+
row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
|
142
|
+
row.spacer 2
|
143
|
+
row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
|
144
|
+
row.spacer 2
|
145
|
+
row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
|
146
|
+
row.spacer 2
|
147
|
+
row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
|
148
|
+
row.spacer 2
|
149
|
+
row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
|
150
|
+
row.spacer 2
|
151
|
+
row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
|
152
|
+
row.spacer 2
|
153
|
+
row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
|
154
|
+
row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
|
155
|
+
row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
|
156
|
+
row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
|
157
|
+
row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
|
158
|
+
row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
159
|
+
row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
160
|
+
row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
|
161
|
+
row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
|
162
|
+
row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
|
163
|
+
row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
|
164
|
+
row.column 'filler' , 1, :type => :string # NOT USED
|
165
|
+
row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
|
166
|
+
row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
class ParserC
|
172
|
+
attr_accessor :year
|
173
|
+
def initialize(options = {})
|
174
|
+
@year = options[:year]
|
175
|
+
end
|
176
|
+
|
177
|
+
def add_hints!(bus)
|
178
|
+
# File will decide format based on filename
|
179
|
+
end
|
180
|
+
|
181
|
+
def apply(row)
|
182
|
+
row.merge!({
|
183
|
+
'make' => row['Manufacturer'], # make it line up with the errata
|
184
|
+
'model' => row['carline name'], # ditto
|
185
|
+
'drive' => row['drv'] + 'WD',
|
186
|
+
'transmission' => TRANSMISSIONS[row['trans'][-3, 1]],
|
187
|
+
'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1],
|
188
|
+
'turbo' => row['T'] == 'T',
|
189
|
+
'supercharger' => row['S'] == 'S',
|
190
|
+
'injection' => true,
|
191
|
+
'year' => year
|
192
|
+
})
|
193
|
+
row
|
194
|
+
end
|
195
|
+
end
|
196
|
+
class ParserD
|
197
|
+
attr_accessor :year
|
198
|
+
def initialize(options = {})
|
199
|
+
@year = options[:year]
|
200
|
+
end
|
201
|
+
|
202
|
+
def add_hints!(bus)
|
203
|
+
bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
|
204
|
+
end
|
205
|
+
|
206
|
+
def apply(row)
|
207
|
+
row.merge!({
|
208
|
+
'make' => row['MFR'], # make it line up with the errata
|
209
|
+
'model' => row['CAR LINE'], # ditto
|
210
|
+
'drive' => row['DRIVE SYS'] + 'WD',
|
211
|
+
'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]],
|
212
|
+
'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1],
|
213
|
+
'turbo' => row['TURBO'] == 'T',
|
214
|
+
'supercharger' => row['SPCHGR'] == 'S',
|
215
|
+
'injection' => true,
|
216
|
+
'year' => year
|
217
|
+
})
|
218
|
+
row
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
class Guru
|
224
|
+
# the following matching methods are needed by the errata
|
225
|
+
# per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments
|
226
|
+
|
227
|
+
def transmission_is_blank?(row)
|
228
|
+
row['transmission'].blank?
|
229
|
+
end
|
230
|
+
|
231
|
+
def is_a_2007_gmc_or_chevrolet?(row)
|
232
|
+
row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase
|
233
|
+
end
|
234
|
+
|
235
|
+
def is_a_porsche?(row)
|
236
|
+
row['make'].upcase == 'PORSCHE'
|
237
|
+
end
|
238
|
+
|
239
|
+
def is_not_a_porsche?(row)
|
240
|
+
!is_a_porsche? row
|
241
|
+
end
|
242
|
+
|
243
|
+
def is_a_mercedes_benz?(row)
|
244
|
+
row['make'] =~ /MERCEDES/i
|
245
|
+
end
|
246
|
+
|
247
|
+
def is_a_lexus?(row)
|
248
|
+
row['make'].upcase == 'LEXUS'
|
249
|
+
end
|
250
|
+
|
251
|
+
def is_a_bmw?(row)
|
252
|
+
row['make'].upcase == 'BMW'
|
253
|
+
end
|
254
|
+
|
255
|
+
def is_a_ford?(row)
|
256
|
+
row['make'].upcase == 'FORD'
|
257
|
+
end
|
258
|
+
|
259
|
+
def is_a_rolls_royce_and_model_contains_bentley?(row)
|
260
|
+
is_a_rolls_royce?(row) and model_contains_bentley?(row)
|
261
|
+
end
|
262
|
+
|
263
|
+
def is_a_bentley?(row)
|
264
|
+
row['make'].upcase == 'BENTLEY'
|
265
|
+
end
|
266
|
+
|
267
|
+
def is_a_rolls_royce?(row)
|
268
|
+
row['make'] =~ /ROLLS/i
|
269
|
+
end
|
270
|
+
|
271
|
+
def is_a_turbo_brooklands?(row)
|
272
|
+
row['model'] =~ /TURBO R\/RL BKLDS/i
|
273
|
+
end
|
274
|
+
|
275
|
+
def model_contains_maybach?(row)
|
276
|
+
row['model'] =~ /MAYBACH/i
|
277
|
+
end
|
278
|
+
|
279
|
+
def model_contains_bentley?(row)
|
280
|
+
row['model'] =~ /BENTLEY/i
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
|
285
|
+
:responder => AutomobileVariant::Guru.new
|
286
|
+
|
287
|
+
data_miner do
|
288
|
+
# 1985---1997
|
289
|
+
(85..97).each do |yy|
|
290
|
+
filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT"
|
291
|
+
import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
|
292
|
+
:filename => filename,
|
293
|
+
:transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
|
294
|
+
:errata => errata) do
|
295
|
+
key 'row_hash'
|
296
|
+
store 'make_name', :field_name => 'make'
|
297
|
+
store 'model_name', :field_name => 'model'
|
298
|
+
store 'year'
|
299
|
+
store 'fuel_type_code', :field_name => 'fuel_type'
|
300
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
301
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
302
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
303
|
+
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
304
|
+
store 'cylinders', :field_name => 'no_cyc'
|
305
|
+
store 'drive', :field_name => 'drive_system'
|
306
|
+
store 'carline_mfr_code'
|
307
|
+
store 'vi_mfr_code'
|
308
|
+
store 'carline_code'
|
309
|
+
store 'carline_class_code', :field_name => 'carline_clss'
|
310
|
+
store 'transmission'
|
311
|
+
store 'speeds'
|
312
|
+
store 'turbo'
|
313
|
+
store 'supercharger'
|
314
|
+
store 'injection'
|
315
|
+
store 'displacement'
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# 1998--2005
|
320
|
+
{
|
321
|
+
1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' },
|
322
|
+
1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' },
|
323
|
+
2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' },
|
324
|
+
2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' }
|
325
|
+
2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' },
|
326
|
+
2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' },
|
327
|
+
2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' },
|
328
|
+
2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' }
|
329
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
330
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year },
|
331
|
+
:errata => errata) do
|
332
|
+
key 'row_hash'
|
333
|
+
store 'make_name', :field_name => 'make'
|
334
|
+
store 'model_name', :field_name => 'model'
|
335
|
+
store 'fuel_type_code', :field_name => 'fl'
|
336
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
337
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
338
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
339
|
+
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
340
|
+
store 'cylinders', :field_name => 'cyl'
|
341
|
+
store 'displacement', :field_name => 'displ'
|
342
|
+
store 'carline_class_code', :field_name => 'cls' if year >= 2000
|
343
|
+
store 'carline_class_name', :field_name => 'Class'
|
344
|
+
store 'year'
|
345
|
+
store 'transmission'
|
346
|
+
store 'speeds'
|
347
|
+
store 'turbo'
|
348
|
+
store 'supercharger'
|
349
|
+
store 'injection'
|
350
|
+
store 'drive'
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
# 2006--2010
|
355
|
+
{
|
356
|
+
2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' },
|
357
|
+
2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' },
|
358
|
+
2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' },
|
359
|
+
2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' },
|
360
|
+
# 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
|
361
|
+
}.sort { |a, b| a.first <=> b.first }.each do |year, options|
|
362
|
+
import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
|
363
|
+
:errata => errata) do
|
364
|
+
key 'row_hash'
|
365
|
+
store 'make_name', :field_name => 'make'
|
366
|
+
store 'model_name', :field_name => 'model'
|
367
|
+
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
368
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
369
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
370
|
+
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
371
|
+
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
372
|
+
store 'cylinders', :field_name => 'NUMB CYL'
|
373
|
+
store 'displacement', :field_name => 'DISPLACEMENT'
|
374
|
+
store 'carline_class_code', :field_name => 'CLS'
|
375
|
+
store 'carline_class_name', :field_name => 'CLASS'
|
376
|
+
store 'year'
|
377
|
+
store 'transmission'
|
378
|
+
store 'speeds'
|
379
|
+
store 'turbo'
|
380
|
+
store 'supercharger'
|
381
|
+
store 'injection'
|
382
|
+
store 'drive'
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# associate :make, :key => :original_automobile_make_name, :foreign_key => :name
|
387
|
+
# derive :automobile_model_id # creates models by name
|
388
|
+
# associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code
|
389
|
+
|
390
|
+
process 'Set adjusted fuel economy' do
|
391
|
+
update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))'
|
392
|
+
update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))'
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def name
|
397
|
+
extra = []
|
398
|
+
extra << "V#{cylinders}" if cylinders
|
399
|
+
extra << "#{displacement}L" if displacement
|
400
|
+
extra << "turbo" if turbo
|
401
|
+
extra << "FI" if injection
|
402
|
+
extra << "#{speeds}spd" if speeds.present?
|
403
|
+
extra << transmission if transmission.present?
|
404
|
+
extra << "(#{fuel_type.name})" if fuel_type
|
405
|
+
extra.join(' ')
|
406
|
+
end
|
407
|
+
|
408
|
+
def fuel_economy_description
|
409
|
+
[ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/')
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
class Country < ActiveRecord::Base
|
414
|
+
set_primary_key :iso_3166
|
415
|
+
|
416
|
+
data_miner do
|
417
|
+
import 'The official ISO country list', :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do
|
418
|
+
key 'iso_3166', :field_number => 1
|
419
|
+
store 'name', :field_number => 0
|
420
|
+
end
|
421
|
+
|
422
|
+
import 'A Princeton dataset with better capitalization', :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do
|
423
|
+
key 'iso_3166', :field_name => 'country code'
|
424
|
+
store 'name', :field_name => 'country'
|
425
|
+
end
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
class Airport < ActiveRecord::Base
|
430
|
+
set_primary_key :iata_code
|
431
|
+
|
432
|
+
data_miner do
|
433
|
+
import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do
|
434
|
+
key 'iata_code', :field_number => 4
|
435
|
+
store 'name', :field_number => 1
|
436
|
+
store 'city', :field_number => 2
|
437
|
+
store 'country_name', :field_number => 3
|
438
|
+
store 'latitude', :field_number => 6
|
439
|
+
store 'longitude', :field_number => 7
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
class TappedAirport < ActiveRecord::Base
|
445
|
+
set_primary_key :iata_code
|
446
|
+
|
447
|
+
data_miner do
|
448
|
+
tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
|
449
|
+
# tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
|
450
|
+
end
|
451
|
+
end
|
452
|
+
|
453
|
+
class CensusRegion < ActiveRecord::Base
|
454
|
+
set_primary_key :number
|
455
|
+
|
456
|
+
data_miner do
|
457
|
+
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
458
|
+
key 'number', :field_name => 'Region'
|
459
|
+
store 'name', :field_name => 'Name'
|
460
|
+
end
|
461
|
+
|
462
|
+
# pretend this is a different data source
|
463
|
+
# fake! just for testing purposes
|
464
|
+
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do
|
465
|
+
key 'number', :field_name => 'Region'
|
466
|
+
store 'name', :field_name => 'Name'
|
467
|
+
end
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
# smaller than a region
|
472
|
+
class CensusDivision < ActiveRecord::Base
|
473
|
+
set_primary_key :number
|
474
|
+
|
475
|
+
data_miner do
|
476
|
+
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
477
|
+
key 'number', :field_name => 'Division'
|
478
|
+
store 'name', :field_name => 'Name'
|
479
|
+
store 'census_region_number', :field_name => 'Region'
|
480
|
+
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
class CensusDivisionDeux < ActiveRecord::Base
|
486
|
+
set_primary_key :number
|
487
|
+
|
488
|
+
data_miner do
|
489
|
+
import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
490
|
+
key 'number', :field_name => 'Division'
|
491
|
+
store 'name', :field_name => 'Name'
|
492
|
+
store 'census_region_number', :field_name => 'Region'
|
493
|
+
store 'census_region_name', :field_name => 'Region', :dictionary => DataMiner::Dictionary.new(:input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv')
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
class CrosscallingCensusRegion < ActiveRecord::Base
|
499
|
+
set_primary_key :number
|
500
|
+
|
501
|
+
has_many :crosscalling_census_divisions
|
502
|
+
|
503
|
+
data_miner do
|
504
|
+
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
505
|
+
CrosscallingCensusDivision.run_data_miner!
|
506
|
+
connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false, :force => true do |t|
|
507
|
+
t.column :number, :integer
|
508
|
+
t.column :name, :string
|
509
|
+
end
|
510
|
+
connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
|
511
|
+
connection.execute %{
|
512
|
+
INSERT IGNORE INTO crosscalling_census_regions(number, name)
|
513
|
+
SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
|
514
|
+
}
|
515
|
+
end
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
class CrosscallingCensusDivision < ActiveRecord::Base
|
520
|
+
set_primary_key :number
|
521
|
+
|
522
|
+
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
523
|
+
|
524
|
+
data_miner do
|
525
|
+
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
526
|
+
key 'number', :field_name => 'Division'
|
527
|
+
store 'name', :field_name => 'Name'
|
528
|
+
store 'census_region_number', :field_name => 'Region'
|
529
|
+
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
530
|
+
end
|
531
|
+
|
532
|
+
process "make sure my parent object is set up (i.e., cross-call it)" do
|
533
|
+
CrosscallingCensusRegion.run_data_miner!
|
534
|
+
end
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
539
|
+
set_primary_key :department_of_energy_identifier
|
540
|
+
|
541
|
+
data_miner do
|
542
|
+
process 'Define some unit conversions' do
|
543
|
+
Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
|
544
|
+
Conversions.register :square_feet, :square_metres, 0.09290304
|
545
|
+
end
|
546
|
+
|
547
|
+
# conversions are NOT performed here, since we first have to zero out legitimate skips
|
548
|
+
# otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
|
549
|
+
import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv' do
|
550
|
+
key 'department_of_energy_identifier', :field_name => 'DOEID'
|
551
|
+
|
552
|
+
store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
|
553
|
+
store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
554
|
+
store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
|
555
|
+
store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
|
556
|
+
store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
|
557
|
+
store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
|
558
|
+
store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
|
559
|
+
store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
|
560
|
+
store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
|
561
|
+
|
562
|
+
store 'census_division_number', :field_name => 'DIVISION'
|
563
|
+
store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
564
|
+
store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
565
|
+
store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
|
566
|
+
|
567
|
+
store 'floorspace', :field_name => 'TOTSQFT'
|
568
|
+
store 'residents', :field_name => 'NHSLDMEM'
|
569
|
+
store 'ownership', :field_name => 'KOWNRENT'
|
570
|
+
store 'thermostat_programmability', :field_name => 'PROTHERM'
|
571
|
+
store 'refrigerator_count', :field_name => 'NUMFRIG'
|
572
|
+
store 'freezer_count', :field_name => 'NUMFREEZ'
|
573
|
+
store 'heating_degree_days', :field_name => 'HD65'
|
574
|
+
store 'cooling_degree_days', :field_name => 'CD65'
|
575
|
+
store 'annual_energy_from_fuel_oil_for_heating_space', :field_name => 'BTUFOSPH'
|
576
|
+
store 'annual_energy_from_fuel_oil_for_heating_water', :field_name => 'BTUFOWTH'
|
577
|
+
store 'annual_energy_from_fuel_oil_for_appliances', :field_name => 'BTUFOAPL'
|
578
|
+
store 'annual_energy_from_natural_gas_for_heating_space', :field_name => 'BTUNGSPH'
|
579
|
+
store 'annual_energy_from_natural_gas_for_heating_water', :field_name => 'BTUNGWTH'
|
580
|
+
store 'annual_energy_from_natural_gas_for_appliances', :field_name => 'BTUNGAPL'
|
581
|
+
store 'annual_energy_from_propane_for_heating_space', :field_name => 'BTULPSPH'
|
582
|
+
store 'annual_energy_from_propane_for_heating_water', :field_name => 'BTULPWTH'
|
583
|
+
store 'annual_energy_from_propane_for_appliances', :field_name => 'BTULPAPL'
|
584
|
+
store 'annual_energy_from_wood', :field_name => 'BTUWOOD'
|
585
|
+
store 'annual_energy_from_kerosene', :field_name => 'BTUKER'
|
586
|
+
store 'annual_energy_from_electricity_for_clothes_driers', :field_name => 'BTUELCDR'
|
587
|
+
store 'annual_energy_from_electricity_for_dishwashers', :field_name => 'BTUELDWH'
|
588
|
+
store 'annual_energy_from_electricity_for_freezers', :field_name => 'BTUELFZZ'
|
589
|
+
store 'annual_energy_from_electricity_for_refrigerators', :field_name => 'BTUELRFG'
|
590
|
+
store 'annual_energy_from_electricity_for_air_conditioners', :field_name => 'BTUELCOL'
|
591
|
+
store 'annual_energy_from_electricity_for_heating_space', :field_name => 'BTUELSPH'
|
592
|
+
store 'annual_energy_from_electricity_for_heating_water', :field_name => 'BTUELWTH'
|
593
|
+
store 'annual_energy_from_electricity_for_other_appliances', :field_name => 'BTUELAPL'
|
594
|
+
store 'weighting', :field_name => 'NWEIGHT'
|
595
|
+
store 'total_rooms', :field_name => 'TOTROOMS'
|
596
|
+
store 'bathrooms', :field_name => 'NCOMBATH'
|
597
|
+
store 'halfbaths', :field_name => 'NHAFBATH'
|
598
|
+
store 'heated_garage', :field_name => 'GARGHEAT'
|
599
|
+
store 'attached_1car_garage', :field_name => 'GARAGE1C'
|
600
|
+
store 'detached_1car_garage', :field_name => 'DGARG1C'
|
601
|
+
store 'attached_2car_garage', :field_name => 'GARAGE2C'
|
602
|
+
store 'detached_2car_garage', :field_name => 'DGARG2C'
|
603
|
+
store 'attached_3car_garage', :field_name => 'GARAGE3C'
|
604
|
+
store 'detached_3car_garage', :field_name => 'DGARG3C'
|
605
|
+
store 'lights_on_1_to_4_hours', :field_name => 'LGT1'
|
606
|
+
store 'efficient_lights_on_1_to_4_hours', :field_name => 'LGT1EE'
|
607
|
+
store 'lights_on_4_to_12_hours', :field_name => 'LGT4'
|
608
|
+
store 'efficient_lights_on_4_to_12_hours', :field_name => 'LGT4EE'
|
609
|
+
store 'lights_on_over_12_hours', :field_name => 'LGT12'
|
610
|
+
store 'efficient_lights_on_over_12_hours', :field_name => 'LGT12EE'
|
611
|
+
store 'outdoor_all_night_lights', :field_name => 'NOUTLGTNT'
|
612
|
+
store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
|
613
|
+
end
|
614
|
+
|
615
|
+
# Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
|
616
|
+
# This makes it easier to derive useful information like "how many rooms does the house have?"
|
617
|
+
process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
|
618
|
+
%w{
|
619
|
+
annual_energy_from_electricity_for_air_conditioners
|
620
|
+
annual_energy_from_electricity_for_clothes_driers
|
621
|
+
annual_energy_from_electricity_for_dishwashers
|
622
|
+
annual_energy_from_electricity_for_freezers
|
623
|
+
annual_energy_from_electricity_for_heating_space
|
624
|
+
annual_energy_from_electricity_for_heating_water
|
625
|
+
annual_energy_from_electricity_for_other_appliances
|
626
|
+
annual_energy_from_electricity_for_refrigerators
|
627
|
+
annual_energy_from_fuel_oil_for_appliances
|
628
|
+
annual_energy_from_fuel_oil_for_heating_space
|
629
|
+
annual_energy_from_fuel_oil_for_heating_water
|
630
|
+
annual_energy_from_kerosene
|
631
|
+
annual_energy_from_propane_for_appliances
|
632
|
+
annual_energy_from_propane_for_heating_space
|
633
|
+
annual_energy_from_propane_for_heating_water
|
634
|
+
annual_energy_from_natural_gas_for_appliances
|
635
|
+
annual_energy_from_natural_gas_for_heating_space
|
636
|
+
annual_energy_from_natural_gas_for_heating_water
|
637
|
+
annual_energy_from_wood
|
638
|
+
lights_on_1_to_4_hours
|
639
|
+
lights_on_over_12_hours
|
640
|
+
efficient_lights_on_over_12_hours
|
641
|
+
efficient_lights_on_1_to_4_hours
|
642
|
+
lights_on_4_to_12_hours
|
643
|
+
efficient_lights_on_4_to_12_hours
|
644
|
+
outdoor_all_night_gas_lights
|
645
|
+
outdoor_all_night_lights
|
646
|
+
thermostat_programmability
|
647
|
+
detached_1car_garage
|
648
|
+
detached_2car_garage
|
649
|
+
detached_3car_garage
|
650
|
+
attached_1car_garage
|
651
|
+
attached_2car_garage
|
652
|
+
attached_3car_garage
|
653
|
+
heated_garage
|
654
|
+
}.each do |attr_name|
|
655
|
+
max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
|
656
|
+
# if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
|
657
|
+
if /^9+$/.match(max.to_i.to_s)
|
658
|
+
update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
|
659
|
+
end
|
660
|
+
end
|
661
|
+
end
|
662
|
+
|
663
|
+
process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
|
664
|
+
[
|
665
|
+
[ 'floorspace', :square_feet, :square_metres ],
|
666
|
+
[ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
|
667
|
+
[ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
|
668
|
+
[ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
|
669
|
+
[ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
|
670
|
+
[ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
|
671
|
+
[ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
|
672
|
+
[ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
|
673
|
+
[ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
|
674
|
+
[ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
|
675
|
+
[ 'annual_energy_from_wood', :kbtus, :joules ],
|
676
|
+
[ 'annual_energy_from_kerosene', :kbtus, :joules ],
|
677
|
+
[ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
|
678
|
+
[ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
|
679
|
+
[ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
|
680
|
+
[ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
|
681
|
+
[ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
|
682
|
+
[ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
|
683
|
+
[ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
|
684
|
+
[ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
|
685
|
+
].each do |attr_name, from_units, to_units|
|
686
|
+
update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
|
687
|
+
end
|
688
|
+
end
|
689
|
+
|
690
|
+
process 'Add a new field "rooms" that estimates how many rooms are in the house' do
|
691
|
+
update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
|
692
|
+
end
|
693
|
+
|
694
|
+
process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
|
695
|
+
update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
|
696
|
+
end
|
697
|
+
|
698
|
+
process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
|
699
|
+
update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
|
700
|
+
end
|
701
|
+
end
|
702
|
+
end
|
703
|
+
|
704
|
+
# T-100 Segment (All Carriers): http://www.transtats.bts.gov/Fields.asp?Table_ID=293
|
705
|
+
class T100FlightSegment < ActiveRecord::Base
|
706
|
+
set_primary_key :row_hash
|
707
|
+
URL = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
|
708
|
+
FORM_DATA = %{
|
709
|
+
UserTableName=T_100_Segment__All_Carriers&
|
710
|
+
DBShortName=Air_Carriers&
|
711
|
+
RawDataTable=T_T100_SEGMENT_ALL_CARRIER&
|
712
|
+
sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D__MONTH_NUMBER__+AND+YEAR%3D__YEAR__&
|
713
|
+
varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&
|
714
|
+
grouplist=&
|
715
|
+
suml=&
|
716
|
+
sumRegion=&
|
717
|
+
filter1=title%3D&
|
718
|
+
filter2=title%3D&
|
719
|
+
geo=All%A0&
|
720
|
+
time=__MONTH_NAME__&
|
721
|
+
timename=Month&
|
722
|
+
GEOGRAPHY=All&
|
723
|
+
XYEAR=__YEAR__&
|
724
|
+
FREQUENCY=__MONTH_NUMBER__&
|
725
|
+
AllVars=All&
|
726
|
+
VarName=DEPARTURES_SCHEDULED&
|
727
|
+
VarDesc=DepScheduled&
|
728
|
+
VarType=Num&
|
729
|
+
VarName=DEPARTURES_PERFORMED&
|
730
|
+
VarDesc=DepPerformed&
|
731
|
+
VarType=Num&
|
732
|
+
VarName=PAYLOAD&
|
733
|
+
VarDesc=Payload&
|
734
|
+
VarType=Num&
|
735
|
+
VarName=SEATS&
|
736
|
+
VarDesc=Seats&
|
737
|
+
VarType=Num&
|
738
|
+
VarName=PASSENGERS&
|
739
|
+
VarDesc=Passengers&
|
740
|
+
VarType=Num&
|
741
|
+
VarName=FREIGHT&
|
742
|
+
VarDesc=Freight&
|
743
|
+
VarType=Num&
|
744
|
+
VarName=MAIL&
|
745
|
+
VarDesc=Mail&
|
746
|
+
VarType=Num&
|
747
|
+
VarName=DISTANCE&
|
748
|
+
VarDesc=Distance&
|
749
|
+
VarType=Num&
|
750
|
+
VarName=RAMP_TO_RAMP&
|
751
|
+
VarDesc=RampToRamp&
|
752
|
+
VarType=Num&
|
753
|
+
VarName=AIR_TIME&
|
754
|
+
VarDesc=AirTime&
|
755
|
+
VarType=Num&
|
756
|
+
VarName=UNIQUE_CARRIER&
|
757
|
+
VarDesc=UniqueCarrier&
|
758
|
+
VarType=Char&
|
759
|
+
VarName=AIRLINE_ID&
|
760
|
+
VarDesc=AirlineID&
|
761
|
+
VarType=Num&
|
762
|
+
VarName=UNIQUE_CARRIER_NAME&
|
763
|
+
VarDesc=UniqueCarrierName&
|
764
|
+
VarType=Char&
|
765
|
+
VarName=UNIQUE_CARRIER_ENTITY&
|
766
|
+
VarDesc=UniqCarrierEntity&
|
767
|
+
VarType=Char&
|
768
|
+
VarName=REGION&
|
769
|
+
VarDesc=CarrierRegion&
|
770
|
+
VarType=Char&
|
771
|
+
VarName=CARRIER&
|
772
|
+
VarDesc=Carrier&
|
773
|
+
VarType=Char&
|
774
|
+
VarName=CARRIER_NAME&
|
775
|
+
VarDesc=CarrierName&
|
776
|
+
VarType=Char&
|
777
|
+
VarName=CARRIER_GROUP&
|
778
|
+
VarDesc=CarrierGroup&
|
779
|
+
VarType=Num&
|
780
|
+
VarName=CARRIER_GROUP_NEW&
|
781
|
+
VarDesc=CarrierGroupNew&
|
782
|
+
VarType=Num&
|
783
|
+
VarName=ORIGIN&
|
784
|
+
VarDesc=Origin&
|
785
|
+
VarType=Char&
|
786
|
+
VarName=ORIGIN_CITY_NAME&
|
787
|
+
VarDesc=OriginCityName&
|
788
|
+
VarType=Char&
|
789
|
+
VarName=ORIGIN_CITY_NUM&
|
790
|
+
VarDesc=OriginCityNum&
|
791
|
+
VarType=Num&
|
792
|
+
VarName=ORIGIN_STATE_ABR&
|
793
|
+
VarDesc=OriginState&
|
794
|
+
VarType=Char&
|
795
|
+
VarName=ORIGIN_STATE_FIPS&
|
796
|
+
VarDesc=OriginStateFips&
|
797
|
+
VarType=Char&
|
798
|
+
VarName=ORIGIN_STATE_NM&
|
799
|
+
VarDesc=OriginStateName&
|
800
|
+
VarType=Char&
|
801
|
+
VarName=ORIGIN_COUNTRY&
|
802
|
+
VarDesc=OriginCountry&
|
803
|
+
VarType=Char&
|
804
|
+
VarName=ORIGIN_COUNTRY_NAME&
|
805
|
+
VarDesc=OriginCountryName&
|
806
|
+
VarType=Char&
|
807
|
+
VarName=ORIGIN_WAC&
|
808
|
+
VarDesc=OriginWac&
|
809
|
+
VarType=Num&
|
810
|
+
VarName=DEST&
|
811
|
+
VarDesc=Dest&
|
812
|
+
VarType=Char&
|
813
|
+
VarName=DEST_CITY_NAME&
|
814
|
+
VarDesc=DestCityName&
|
815
|
+
VarType=Char&
|
816
|
+
VarName=DEST_CITY_NUM&
|
817
|
+
VarDesc=DestCityNum&
|
818
|
+
VarType=Num&
|
819
|
+
VarName=DEST_STATE_ABR&
|
820
|
+
VarDesc=DestState&
|
821
|
+
VarType=Char&
|
822
|
+
VarName=DEST_STATE_FIPS&
|
823
|
+
VarDesc=DestStateFips&
|
824
|
+
VarType=Char&
|
825
|
+
VarName=DEST_STATE_NM&
|
826
|
+
VarDesc=DestStateName&
|
827
|
+
VarType=Char&
|
828
|
+
VarName=DEST_COUNTRY&
|
829
|
+
VarDesc=DestCountry&
|
830
|
+
VarType=Char&
|
831
|
+
VarName=DEST_COUNTRY_NAME&
|
832
|
+
VarDesc=DestCountryName&
|
833
|
+
VarType=Char&
|
834
|
+
VarName=DEST_WAC&
|
835
|
+
VarDesc=DestWac&
|
836
|
+
VarType=Num&
|
837
|
+
VarName=AIRCRAFT_GROUP&
|
838
|
+
VarDesc=AircraftGroup&
|
839
|
+
VarType=Num&
|
840
|
+
VarName=AIRCRAFT_TYPE&
|
841
|
+
VarDesc=AircraftType&
|
842
|
+
VarType=Char&
|
843
|
+
VarName=AIRCRAFT_CONFIG&
|
844
|
+
VarDesc=AircraftConfig&
|
845
|
+
VarType=Num&
|
846
|
+
VarName=YEAR&
|
847
|
+
VarDesc=Year&
|
848
|
+
VarType=Num&
|
849
|
+
VarName=QUARTER&
|
850
|
+
VarDesc=Quarter&
|
851
|
+
VarType=Num&
|
852
|
+
VarName=MONTH&
|
853
|
+
VarDesc=Month&
|
854
|
+
VarType=Num&
|
855
|
+
VarName=DISTANCE_GROUP&
|
856
|
+
VarDesc=DistanceGroup&
|
857
|
+
VarType=Num&
|
858
|
+
VarName=CLASS&
|
859
|
+
VarDesc=Class&
|
860
|
+
VarType=Char&
|
861
|
+
VarName=DATA_SOURCE&
|
862
|
+
VarDesc=DataSource&
|
863
|
+
VarType=Char
|
864
|
+
}.gsub /[\s]+/,''
|
865
|
+
|
866
|
+
data_miner do
|
867
|
+
months = Hash.new
|
868
|
+
# (2008..2009).each do |year|
|
869
|
+
(2008..2008).each do |year|
|
870
|
+
# (1..12).each do |month|
|
871
|
+
(1..1).each do |month|
|
872
|
+
time = Time.gm year, month
|
873
|
+
form_data = FORM_DATA.dup
|
874
|
+
form_data.gsub! '__YEAR__', time.year.to_s
|
875
|
+
form_data.gsub! '__MONTH_NUMBER__', time.month.to_s
|
876
|
+
form_data.gsub! '__MONTH_NAME__', time.strftime('%B')
|
877
|
+
months[time] = form_data
|
878
|
+
end
|
879
|
+
end
|
880
|
+
months.each do |month, form_data|
|
881
|
+
import "T100 data from #{month.strftime('%B %Y')}",
|
882
|
+
:url => URL,
|
883
|
+
:form_data => form_data,
|
884
|
+
:compression => :zip,
|
885
|
+
:glob => '/*.csv' do
|
886
|
+
key 'row_hash'
|
887
|
+
store 'departures_scheduled', :field_name => 'DEPARTURES_SCHEDULED'
|
888
|
+
store 'departures_performed', :field_name => 'DEPARTURES_PERFORMED'
|
889
|
+
store 'payload', :field_name => 'PAYLOAD', :from_units => :pounds, :to_units => :kilograms
|
890
|
+
store 'seats', :field_name => 'SEATS'
|
891
|
+
store 'passengers', :field_name => 'PASSENGERS'
|
892
|
+
store 'freight', :field_name => 'FREIGHT', :from_units => :pounds, :to_units => :kilograms
|
893
|
+
store 'mail', :field_name => 'MAIL', :from_units => :pounds, :to_units => :kilograms
|
894
|
+
store 'distance', :field_name => 'DISTANCE', :from_units => :miles, :to_units => :kilometres
|
895
|
+
store 'ramp_to_ramp', :field_name => 'RAMP_TO_RAMP'
|
896
|
+
store 'air_time', :field_name => 'AIR_TIME'
|
897
|
+
store 'unique_carrier', :field_name => 'UNIQUE_CARRIER'
|
898
|
+
store 'dot_airline_id', :field_name => 'AIRLINE_ID'
|
899
|
+
store 'unique_carrier_name', :field_name => 'UNIQUE_CARRIER_NAME'
|
900
|
+
store 'unique_carrier_entity', :field_name => 'UNIQUE_CARRIER_ENTITY'
|
901
|
+
store 'region', :field_name => 'REGION'
|
902
|
+
store 'carrier', :field_name => 'CARRIER'
|
903
|
+
store 'carrier_name', :field_name => 'CARRIER_NAME'
|
904
|
+
store 'carrier_group', :field_name => 'CARRIER_GROUP'
|
905
|
+
store 'carrier_group_new', :field_name => 'CARRIER_GROUP_NEW'
|
906
|
+
store 'origin_airport_iata', :field_name => 'ORIGIN'
|
907
|
+
store 'origin_city_name', :field_name => 'ORIGIN_CITY_NAME'
|
908
|
+
store 'origin_city_num', :field_name => 'ORIGIN_CITY_NUM'
|
909
|
+
store 'origin_state_abr', :field_name => 'ORIGIN_STATE_ABR'
|
910
|
+
store 'origin_state_fips', :field_name => 'ORIGIN_STATE_FIPS'
|
911
|
+
store 'origin_state_nm', :field_name => 'ORIGIN_STATE_NM'
|
912
|
+
store 'origin_country_iso_3166', :field_name => 'ORIGIN_COUNTRY'
|
913
|
+
store 'origin_country_name', :field_name => 'ORIGIN_COUNTRY_NAME'
|
914
|
+
store 'origin_wac', :field_name => 'ORIGIN_WAC'
|
915
|
+
store 'dest_airport_iata', :field_name => 'DEST'
|
916
|
+
store 'dest_city_name', :field_name => 'DEST_CITY_NAME'
|
917
|
+
store 'dest_city_num', :field_name => 'DEST_CITY_NUM'
|
918
|
+
store 'dest_state_abr', :field_name => 'DEST_STATE_ABR'
|
919
|
+
store 'dest_state_fips', :field_name => 'DEST_STATE_FIPS'
|
920
|
+
store 'dest_state_nm', :field_name => 'DEST_STATE_NM'
|
921
|
+
store 'dest_country_iso_3166', :field_name => 'DEST_COUNTRY'
|
922
|
+
store 'dest_country_name', :field_name => 'DEST_COUNTRY_NAME'
|
923
|
+
store 'dest_wac', :field_name => 'DEST_WAC'
|
924
|
+
store 'bts_aircraft_group', :field_name => 'AIRCRAFT_GROUP'
|
925
|
+
store 'bts_aircraft_type', :field_name => 'AIRCRAFT_TYPE'
|
926
|
+
store 'bts_aircraft_config', :field_name => 'AIRCRAFT_CONFIG'
|
927
|
+
store 'year', :field_name => 'YEAR'
|
928
|
+
store 'quarter', :field_name => 'QUARTER'
|
929
|
+
store 'month', :field_name => 'MONTH'
|
930
|
+
store 'bts_distance_group', :field_name => 'DISTANCE_GROUP'
|
931
|
+
store 'bts_service_class', :field_name => 'CLASS'
|
932
|
+
store 'data_source', :field_name => 'DATA_SOURCE'
|
933
|
+
end
|
934
|
+
end
|
935
|
+
|
936
|
+
process 'Derive freight share as a fraction of payload' do
|
937
|
+
update_all 'freight_share = (freight + mail) / payload', 'payload > 0'
|
938
|
+
end
|
939
|
+
|
940
|
+
process 'Derive load factor, which is passengers divided by the total seats available' do
|
941
|
+
update_all 'load_factor = passengers / seats', 'passengers <= seats'
|
942
|
+
end
|
943
|
+
|
944
|
+
process 'Derive average seats per departure' do
|
945
|
+
update_all 'seats_per_departure = seats / departures_performed', 'departures_performed > 0'
|
946
|
+
end
|
947
|
+
end
|
948
|
+
end
|
949
|
+
|
950
|
+
require 'loose_tight_dictionary'
|
951
|
+
class Aircraft < ActiveRecord::Base
|
952
|
+
set_primary_key :icao_code
|
953
|
+
|
954
|
+
def self.bts_dictionary
|
955
|
+
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
956
|
+
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
957
|
+
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
958
|
+
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
959
|
+
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
960
|
+
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
961
|
+
end
|
962
|
+
|
963
|
+
class BtsAircraftTypeCodeMatcher
|
964
|
+
def match(left_record)
|
965
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
966
|
+
right_record['Aircraft Type'] if right_record
|
967
|
+
end
|
968
|
+
end
|
969
|
+
|
970
|
+
class BtsNameMatcher
|
971
|
+
def match(left_record)
|
972
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
973
|
+
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
974
|
+
end
|
975
|
+
end
|
976
|
+
|
977
|
+
class Guru
|
978
|
+
# for errata
|
979
|
+
def is_attributed_to_boeing?(row)
|
980
|
+
row['Manufacturer'] =~ /BOEING/i
|
981
|
+
end
|
982
|
+
|
983
|
+
def is_attributed_to_cessna?(row)
|
984
|
+
row['Manufacturer'] =~ /CESSNA/i
|
985
|
+
end
|
986
|
+
|
987
|
+
def is_attributed_to_fokker?(row)
|
988
|
+
row['Manufacturer'] =~ /FOKKER/i
|
989
|
+
end
|
990
|
+
|
991
|
+
def is_not_attributed_to_aerospatiale?(row)
|
992
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
993
|
+
end
|
994
|
+
|
995
|
+
def is_not_attributed_to_cessna?(row)
|
996
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
997
|
+
end
|
998
|
+
|
999
|
+
def is_not_attributed_to_learjet?(row)
|
1000
|
+
not row['Manufacturer'] =~ /LEAR/i
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
def is_not_attributed_to_dehavilland?(row)
|
1004
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
1005
|
+
end
|
1006
|
+
|
1007
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
1008
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
def is_not_a_dc_plane?(row)
|
1012
|
+
not row['Model'] =~ /DC/i
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
def is_a_crj_900?(row)
|
1016
|
+
row['Designator'].downcase == 'crj9'
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
data_miner do
|
1021
|
+
# ('A'..'Z').each do |letter|
|
1022
|
+
# Note: for the purposes of testing, only importing "D"
|
1023
|
+
%w{ D }.each do |letter|
|
1024
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1025
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1026
|
+
:encoding => 'US-ASCII',
|
1027
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1028
|
+
:responder => Aircraft::Guru.new),
|
1029
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
1030
|
+
:column_xpath => 'td') do
|
1031
|
+
key 'icao_code', :field_name => 'Designator'
|
1032
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
1033
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
1034
|
+
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1035
|
+
store 'name', :field_name => 'Model'
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
import 'Brighter Planet aircraft class codes',
|
1039
|
+
:url => 'http://static.brighterplanet.com/science/data/transport/air/bts_aircraft_type/bts_aircraft_types-brighter_planet_aircraft_classes.csv' do
|
1040
|
+
key 'bts_aircraft_type_code', :field_name => 'bts_aircraft_type'
|
1041
|
+
store 'brighter_planet_aircraft_class_code'
|
1042
|
+
end
|
1043
|
+
end
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
# note that this depends on stuff in Aircraft
|
1048
|
+
class AircraftDeux < ActiveRecord::Base
|
1049
|
+
set_primary_key :icao_code
|
1050
|
+
|
1051
|
+
# defined on the class because we defined the errata with a shorthand
|
1052
|
+
class << self
|
1053
|
+
def is_not_attributed_to_aerospatiale?(row)
|
1054
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
def is_not_attributed_to_cessna?(row)
|
1058
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
1059
|
+
end
|
1060
|
+
|
1061
|
+
def is_not_attributed_to_learjet?(row)
|
1062
|
+
not row['Manufacturer'] =~ /LEAR/i
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
def is_not_attributed_to_dehavilland?(row)
|
1066
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
1070
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
def is_not_a_dc_plane?(row)
|
1074
|
+
not row['Model'] =~ /DC/i
|
1075
|
+
end
|
1076
|
+
|
1077
|
+
def is_a_crj_900?(row)
|
1078
|
+
row['Designator'].downcase == 'crj9'
|
1079
|
+
end
|
1080
|
+
end
|
1081
|
+
|
1082
|
+
data_miner do
|
1083
|
+
# ('A'..'Z').each do |letter|
|
1084
|
+
# Note: for the purposes of testing, only importing "D"
|
1085
|
+
%w{ D }.each do |letter|
|
1086
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1087
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1088
|
+
:encoding => 'windows-1252',
|
1089
|
+
:errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1090
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
1091
|
+
:column_xpath => 'td') do
|
1092
|
+
key 'icao_code', :field_name => 'Designator'
|
1093
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher.new
|
1094
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher.new
|
1095
|
+
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1096
|
+
store 'name', :field_name => 'Model'
|
1097
|
+
end
|
1098
|
+
end
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
class AutomobileMakeFleetYear < ActiveRecord::Base
|
1103
|
+
set_primary_key :name
|
1104
|
+
|
1105
|
+
data_miner do
|
1106
|
+
schema :id => false do
|
1107
|
+
string "name"
|
1108
|
+
string "make_name"
|
1109
|
+
string "fleet"
|
1110
|
+
integer "year"
|
1111
|
+
float "fuel_efficiency"
|
1112
|
+
string "fuel_efficiency_units"
|
1113
|
+
integer "volume"
|
1114
|
+
string "make_year_name"
|
1115
|
+
datetime "created_at"
|
1116
|
+
datetime "updated_at"
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
process "finish if i tell you to" do
|
1120
|
+
raise DataMiner::Finish if $force_finish
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
process "skip if i tell you to" do
|
1124
|
+
raise DataMiner::Skip if $force_skip
|
1125
|
+
end
|
1126
|
+
|
1127
|
+
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
1128
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
|
1129
|
+
:errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
|
1130
|
+
:select => lambda { |row| row['volume'].to_i > 0 } do
|
1131
|
+
key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
|
1132
|
+
store 'make_name', :field_name => 'manufacturer_name'
|
1133
|
+
store 'year', :field_name => 'year_content'
|
1134
|
+
store 'fleet', :chars => 2..3 # zero-based
|
1135
|
+
store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
1136
|
+
store 'volume'
|
1137
|
+
end
|
1138
|
+
end
|
1139
|
+
end
|
1140
|
+
|
1141
|
+
class CensusDivisionTrois < ActiveRecord::Base
|
1142
|
+
set_primary_key :number_code
|
1143
|
+
data_miner do
|
1144
|
+
schema :options => 'ENGINE=InnoDB default charset=utf8' do
|
1145
|
+
string 'number_code'
|
1146
|
+
string 'name'
|
1147
|
+
string 'census_region_name'
|
1148
|
+
integer 'census_region_number'
|
1149
|
+
index 'census_region_name', :name => 'homefry'
|
1150
|
+
index ['number_code', 'name', 'census_region_name', 'census_region_number', 'updated_at', 'created_at']
|
1151
|
+
end
|
1152
|
+
end
|
1153
|
+
end
|
1154
|
+
|
1155
|
+
class CensusDivisionFour < ActiveRecord::Base
|
1156
|
+
data_miner do
|
1157
|
+
schema do
|
1158
|
+
string 'number_code'
|
1159
|
+
string 'name'
|
1160
|
+
string 'census_region_name'
|
1161
|
+
integer 'census_region_number'
|
1162
|
+
index 'census_region_name', :name => 'homefry'
|
1163
|
+
end
|
1164
|
+
end
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
# todo: have somebody properly organize these
|
1168
|
+
class DataMinerTest < Test::Unit::TestCase
|
1169
|
+
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
1170
|
+
should 'directly create a table for the model' do
|
1171
|
+
if AutomobileMakeFleetYear.table_exists?
|
1172
|
+
ActiveRecord::Base.connection.execute 'DROP TABLE automobile_make_fleet_years;'
|
1173
|
+
end
|
1174
|
+
AutomobileMakeFleetYear.execute_schema
|
1175
|
+
assert AutomobileMakeFleetYear.table_exists?
|
1176
|
+
end
|
1177
|
+
end
|
1178
|
+
|
1179
|
+
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
1180
|
+
should 'override an existing data_miner configuration' do
|
1181
|
+
AutomobileFuelType.class_eval do
|
1182
|
+
data_miner do
|
1183
|
+
import 'example', :url => 'http://example.com' do
|
1184
|
+
key 'code'
|
1185
|
+
store 'name'
|
1186
|
+
end
|
1187
|
+
end
|
1188
|
+
end
|
1189
|
+
assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_base.steps.first
|
1190
|
+
assert_equal 'http://example.com', AutomobileFuelType.data_miner_base.steps.first.table.package.url
|
1191
|
+
assert_equal 1, AutomobileFuelType.data_miner_base.step_counter
|
1192
|
+
end
|
1193
|
+
should "stop and finish if it gets a DataMiner::Finish" do
|
1194
|
+
AutomobileMakeFleetYear.delete_all
|
1195
|
+
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
1196
|
+
$force_finish = true
|
1197
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1198
|
+
assert_equal 0, AutomobileMakeFleetYear.count
|
1199
|
+
assert_equal true, (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
1200
|
+
assert_equal true, AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
|
1201
|
+
$force_finish = false
|
1202
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1203
|
+
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
1204
|
+
end
|
1205
|
+
|
1206
|
+
should "stop and register skipped if it gets a DataMiner::Skip" do
|
1207
|
+
AutomobileMakeFleetYear.delete_all
|
1208
|
+
AutomobileMakeFleetYear.data_miner_runs.delete_all
|
1209
|
+
$force_skip = true
|
1210
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1211
|
+
assert_equal 0, AutomobileMakeFleetYear.count
|
1212
|
+
assert_equal true, (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
1213
|
+
assert_equal true, AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
|
1214
|
+
$force_skip = false
|
1215
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1216
|
+
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
1217
|
+
end
|
1218
|
+
|
1219
|
+
should "eagerly enforce a schema" do
|
1220
|
+
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
1221
|
+
t.string 'name'
|
1222
|
+
# t.datetime 'updated_at'
|
1223
|
+
# t.datetime 'created_at'
|
1224
|
+
t.string 'census_region_name'
|
1225
|
+
# t.integer 'census_region_number'
|
1226
|
+
end
|
1227
|
+
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
1228
|
+
CensusDivisionTrois.reset_column_information
|
1229
|
+
missing_columns = %w{ updated_at created_at census_region_number }
|
1230
|
+
|
1231
|
+
# sanity check
|
1232
|
+
missing_columns.each do |column|
|
1233
|
+
assert_equal false, CensusDivisionTrois.column_names.include?(column)
|
1234
|
+
end
|
1235
|
+
assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1236
|
+
|
1237
|
+
3.times do
|
1238
|
+
CensusDivisionTrois.run_data_miner!
|
1239
|
+
missing_columns.each do |column|
|
1240
|
+
assert_equal true, CensusDivisionTrois.column_names.include?(column)
|
1241
|
+
end
|
1242
|
+
assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1243
|
+
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
1244
|
+
end
|
1245
|
+
end
|
1246
|
+
|
1247
|
+
should "let schemas work with default id primary keys" do
|
1248
|
+
ActiveRecord::Base.connection.create_table 'census_division_fours', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
1249
|
+
t.string 'name'
|
1250
|
+
# t.datetime 'updated_at'
|
1251
|
+
# t.datetime 'created_at'
|
1252
|
+
t.string 'census_region_name'
|
1253
|
+
# t.integer 'census_region_number'
|
1254
|
+
end
|
1255
|
+
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_fours ADD INDEX (census_region_name)'
|
1256
|
+
CensusDivisionFour.reset_column_information
|
1257
|
+
missing_columns = %w{ updated_at created_at census_region_number }
|
1258
|
+
|
1259
|
+
# sanity check
|
1260
|
+
missing_columns.each do |column|
|
1261
|
+
assert_equal false, CensusDivisionFour.column_names.include?(column)
|
1262
|
+
end
|
1263
|
+
assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
1264
|
+
|
1265
|
+
3.times do
|
1266
|
+
CensusDivisionFour.run_data_miner!
|
1267
|
+
missing_columns.each do |column|
|
1268
|
+
assert_equal true, CensusDivisionFour.column_names.include?(column)
|
1269
|
+
end
|
1270
|
+
assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
1271
|
+
assert_equal :integer, CensusDivisionFour.columns_hash[CensusDivisionFour.primary_key].type
|
1272
|
+
end
|
1273
|
+
end
|
1274
|
+
|
1275
|
+
should "allow specifying dictionaries explicitly" do
|
1276
|
+
CensusDivisionDeux.run_data_miner!
|
1277
|
+
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
1278
|
+
end
|
1279
|
+
|
1280
|
+
should "be able to key on things other than the primary key" do
|
1281
|
+
Aircraft.run_data_miner!
|
1282
|
+
assert_equal 'SP', Aircraft.find('DHC6').brighter_planet_aircraft_class_code
|
1283
|
+
end
|
1284
|
+
|
1285
|
+
should "be able to synthesize rows without using a full parser class" do
|
1286
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1287
|
+
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
1288
|
+
end
|
1289
|
+
|
1290
|
+
should "keep a call stack so that you can call run_data_miner! on a child" do
|
1291
|
+
CrosscallingCensusDivision.run_data_miner!
|
1292
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
1293
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
1294
|
+
end
|
1295
|
+
|
1296
|
+
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
1297
|
+
CrosscallingCensusRegion.run_data_miner!
|
1298
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
1299
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
1300
|
+
end
|
1301
|
+
|
1302
|
+
should "import airports" do
|
1303
|
+
Airport.run_data_miner!
|
1304
|
+
assert Airport.count > 0
|
1305
|
+
end
|
1306
|
+
|
1307
|
+
should "tap airports" do
|
1308
|
+
TappedAirport.run_data_miner!
|
1309
|
+
assert TappedAirport.count > 0
|
1310
|
+
end
|
1311
|
+
|
1312
|
+
should "pull in census divisions using a data.brighterplanet.com dictionary" do
|
1313
|
+
CensusDivision.run_data_miner!
|
1314
|
+
assert CensusDivision.count > 0
|
1315
|
+
end
|
1316
|
+
|
1317
|
+
should "have a way to queue up runs that works with delated_job's send_later" do
|
1318
|
+
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
should "be idempotent" do
|
1322
|
+
Country.data_miner_base.run
|
1323
|
+
a = Country.count
|
1324
|
+
Country.data_miner_base.run
|
1325
|
+
b = Country.count
|
1326
|
+
assert_equal a, b
|
1327
|
+
|
1328
|
+
CensusRegion.data_miner_base.run
|
1329
|
+
a = CensusRegion.count
|
1330
|
+
CensusRegion.data_miner_base.run
|
1331
|
+
b = CensusRegion.count
|
1332
|
+
assert_equal a, b
|
1333
|
+
end
|
1334
|
+
|
1335
|
+
should "hash things" do
|
1336
|
+
AutomobileVariant.data_miner_base.steps[0].run(nil)
|
1337
|
+
assert AutomobileVariant.first.row_hash.present?
|
1338
|
+
end
|
1339
|
+
|
1340
|
+
should "process a callback block instead of a method" do
|
1341
|
+
AutomobileVariant.delete_all
|
1342
|
+
AutomobileVariant.data_miner_base.steps[0].run(nil)
|
1343
|
+
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
1344
|
+
AutomobileVariant.data_miner_base.steps.last.run(nil)
|
1345
|
+
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
1346
|
+
end
|
1347
|
+
|
1348
|
+
should "keep a log when it does a run" do
|
1349
|
+
approx_started_at = Time.now
|
1350
|
+
DataMiner.run :resource_names => %w{ Country }
|
1351
|
+
approx_terminated_at = Time.now
|
1352
|
+
last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
|
1353
|
+
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
1354
|
+
assert (last_run.terminated_at - approx_terminated_at).abs < 5 # seconds
|
1355
|
+
end
|
1356
|
+
|
1357
|
+
should "request a re-import from scratch" do
|
1358
|
+
c = Country.new
|
1359
|
+
c.iso_3166 = 'JUNK'
|
1360
|
+
c.save!
|
1361
|
+
assert Country.exists?(:iso_3166 => 'JUNK')
|
1362
|
+
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
1363
|
+
assert !Country.exists?(:iso_3166 => 'JUNK')
|
1364
|
+
end
|
1365
|
+
|
1366
|
+
should "know what runs were on a resource" do
|
1367
|
+
DataMiner.run :resource_names => %w{ Country }
|
1368
|
+
DataMiner.run :resource_names => %w{ Country }
|
1369
|
+
assert Country.data_miner_runs.count > 0
|
1370
|
+
end
|
1371
|
+
end
|
1372
|
+
|
1373
|
+
if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
|
1374
|
+
should "allow errata to be specified with a shorthand, assuming the responder is the resource class itself" do
|
1375
|
+
AircraftDeux.run_data_miner!
|
1376
|
+
assert AircraftDeux.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
1377
|
+
end
|
1378
|
+
|
1379
|
+
should "mine aircraft" do
|
1380
|
+
Aircraft.run_data_miner!
|
1381
|
+
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
should "mine automobile variants" do
|
1385
|
+
AutomobileVariant.run_data_miner!
|
1386
|
+
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
1387
|
+
end
|
1388
|
+
|
1389
|
+
should "mine T100 flight segments" do
|
1390
|
+
T100FlightSegment.run_data_miner!
|
1391
|
+
assert T100FlightSegment.count('dest_country_name LIKE "%United States"') > 0
|
1392
|
+
end
|
1393
|
+
|
1394
|
+
should "mine residence survey responses" do
|
1395
|
+
ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
1396
|
+
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.start_with?('Single-family detached house')
|
1397
|
+
end
|
1398
|
+
end
|
1399
|
+
end
|