activewarehouse-etl 0.8.4 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +98 -62
- data/Rakefile +11 -0
- data/TODO +2 -1
- data/lib/etl.rb +9 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +1 -0
- data/lib/etl/builder/date_dimension_builder.rb +83 -0
- data/lib/etl/commands/etl.rb +56 -43
- data/lib/etl/control/control.rb +58 -9
- data/lib/etl/control/destination.rb +29 -4
- data/lib/etl/control/destination/database_destination.rb +17 -27
- data/lib/etl/control/source/database_source.rb +17 -40
- data/lib/etl/control/source/file_source.rb +8 -5
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +40 -0
- data/lib/etl/engine.rb +184 -83
- data/lib/etl/execution.rb +1 -0
- data/lib/etl/execution/base.rb +1 -1
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +1 -0
- data/lib/etl/execution/migration.rb +16 -4
- data/lib/etl/generator/surrogate_key_generator.rb +20 -4
- data/lib/etl/http_tools.rb +1 -1
- data/lib/etl/processor/bulk_import_processor.rb +16 -19
- data/lib/etl/processor/check_exist_processor.rb +16 -7
- data/lib/etl/processor/hierarchy_exploder_processor.rb +2 -1
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/surrogate_key_processor.rb +22 -2
- data/lib/etl/processor/truncate_processor.rb +13 -13
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +7 -2
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -5
- data/lib/etl/transform/hierarchy_lookup_transform.rb +7 -14
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +2 -2
- metadata +19 -2
@@ -8,6 +8,9 @@ module ETL #:nodoc:
|
|
8
8
|
module Control #:nodoc:
|
9
9
|
# Source object which extracts data from a database using ActiveRecord.
|
10
10
|
class DatabaseSource < Source
|
11
|
+
attr_accessor :target
|
12
|
+
attr_accessor :table
|
13
|
+
|
11
14
|
# Initialize the source.
|
12
15
|
#
|
13
16
|
# Arguments:
|
@@ -16,38 +19,39 @@ module ETL #:nodoc:
|
|
16
19
|
# * <tt>definition</tt>: The source definition
|
17
20
|
#
|
18
21
|
# Required configuration options:
|
22
|
+
# * <tt>:target</tt>: The target connection
|
19
23
|
# * <tt>:table</tt>: The source table name
|
20
24
|
# * <tt>:database</tt>: The database name
|
21
25
|
#
|
22
26
|
# Other options:
|
23
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
24
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
25
|
-
# * <tt>:password</tt>: The password to the database (defaults to
|
26
|
-
# nothing)
|
27
|
-
# * <tt>:host</tt>: The host for the database (defaults to
|
28
|
-
# 'localhost')
|
29
27
|
# * <tt>:join</tt>: Optional join part for the query (ignored unless
|
30
28
|
# specified)
|
31
29
|
# * <tt>:select</tt>: Optional select part for the query (defaults to
|
32
30
|
# '*')
|
31
|
+
# * <tt>:group</tt>: Optional group by part for the query (ignored
|
32
|
+
# unless specified)
|
33
33
|
# * <tt>:order</tt>: Optional order part for the query (ignored unless
|
34
34
|
# specified)
|
35
|
+
# * <tt>:new_records_only</tt>: Specify the column to use when comparing
|
36
|
+
# timestamps against the last successful ETL job execution for the
|
37
|
+
# current control file.
|
35
38
|
# * <tt>:store_locally</tt>: Set to false to not store a copy of the
|
36
39
|
# source data locally in a flat file (defaults to true)
|
37
40
|
def initialize(control, configuration, definition)
|
38
41
|
super
|
39
|
-
|
42
|
+
@target = configuration[:target]
|
43
|
+
@table = configuration[:table]
|
40
44
|
end
|
41
45
|
|
42
46
|
# Get a String identifier for the source
|
43
47
|
def to_s
|
44
|
-
"#{host}/#{
|
48
|
+
"#{host}/#{database}/#{table}"
|
45
49
|
end
|
46
50
|
|
47
51
|
# Get the local directory to use, which is a combination of the
|
48
52
|
# local_base, the db hostname the db database name and the db table.
|
49
53
|
def local_directory
|
50
|
-
File.join(local_base, host,
|
54
|
+
File.join(local_base, host, database, configuration[:table])
|
51
55
|
end
|
52
56
|
|
53
57
|
# Get the join part of the query, defaults to nil
|
@@ -202,43 +206,16 @@ module ETL #:nodoc:
|
|
202
206
|
|
203
207
|
# Get the database connection to use
|
204
208
|
def connection
|
205
|
-
ETL::
|
206
|
-
end
|
207
|
-
|
208
|
-
# Get the adapter name, defaults to :mysql
|
209
|
-
def adapter
|
210
|
-
configuration[:adapter] || :mysql
|
209
|
+
ETL::Engine.connection(target)
|
211
210
|
end
|
212
211
|
|
213
212
|
# Get the host, defaults to 'localhost'
|
214
213
|
def host
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
|
-
# Get the username, defaults to 'root'
|
219
|
-
def username
|
220
|
-
configuration[:username] || 'root'
|
214
|
+
ETL::Base.configurations[target.to_s]['host'] || 'localhost'
|
221
215
|
end
|
222
216
|
|
223
|
-
|
224
|
-
|
225
|
-
# Required options:
|
226
|
-
# * <tt>:database</tt>: The database name
|
227
|
-
#
|
228
|
-
# Options:
|
229
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
230
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
231
|
-
# * <tt>:password</tt>: The password to the database (defaults
|
232
|
-
# to nothing)
|
233
|
-
# * <tt>:host<tt>: The host for the database (defaults to 'localhost')
|
234
|
-
def connect
|
235
|
-
ETL::Source.establish_connection(
|
236
|
-
:adapter => adapter,
|
237
|
-
:username => username,
|
238
|
-
:host => host,
|
239
|
-
:password => configuration[:password],
|
240
|
-
:database => configuration[:database]
|
241
|
-
)
|
217
|
+
def database
|
218
|
+
ETL::Base.configurations[target.to_s]['database']
|
242
219
|
end
|
243
220
|
end
|
244
221
|
end
|
@@ -40,13 +40,16 @@ module ETL #:nodoc:
|
|
40
40
|
|
41
41
|
# Returns each row from the source
|
42
42
|
def each
|
43
|
+
count = 0
|
43
44
|
copy_sources if store_locally
|
44
45
|
@parser.each do |row|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
if ETL::Engine.offset && count < ETL::Engine.offset
|
47
|
+
count += 1
|
48
|
+
else
|
49
|
+
row = ETL::Row[row]
|
50
|
+
row.source = self
|
51
|
+
yield row
|
52
|
+
end
|
50
53
|
end
|
51
54
|
end
|
52
55
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#RAILS_ENV = 'development'
|
2
|
+
#require '../config/environment'
|
3
|
+
|
4
|
+
module ETL #:nodoc:
|
5
|
+
module Control #:nodoc:
|
6
|
+
class ModelSource < Source
|
7
|
+
|
8
|
+
def columns
|
9
|
+
case definition
|
10
|
+
when Array
|
11
|
+
definition.collect(&:to_sym)
|
12
|
+
when Hash
|
13
|
+
definition.keys.collect(&:to_sym)
|
14
|
+
else
|
15
|
+
raise "Definition must be either an Array or a Hash"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def railsmodel
|
20
|
+
configuration[:model]
|
21
|
+
end
|
22
|
+
|
23
|
+
def order
|
24
|
+
configuration[:order] || "id"
|
25
|
+
end
|
26
|
+
|
27
|
+
def each(&block)
|
28
|
+
railsmodel.to_s.camelize.constantize.find(:all,:order=>order).each do |row|
|
29
|
+
result_row = ETL::Row.new
|
30
|
+
result_row.source = self
|
31
|
+
columns.each do |column|
|
32
|
+
result_row[column.to_sym] = row.send(column)
|
33
|
+
end
|
34
|
+
yield result_row
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/etl/core_ext.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'etl/core_ext/time'
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module CoreExtensions #:nodoc:
|
3
|
+
module Time #:nodoc:
|
4
|
+
# Enables the use of time calculations within Time itself
|
5
|
+
module Calculations
|
6
|
+
def week
|
7
|
+
cyw = ((yday - 1) / 7) + 1
|
8
|
+
cyw = 52 if cyw == 53
|
9
|
+
cyw
|
10
|
+
end
|
11
|
+
def quarter
|
12
|
+
((month - 1) / 3) + 1
|
13
|
+
end
|
14
|
+
def fiscal_year_week(offset_month=10)
|
15
|
+
fyw = ((fiscal_year_yday(offset_month) - 1) / 7) + 1
|
16
|
+
fyw = 52 if fyw == 53
|
17
|
+
fyw
|
18
|
+
end
|
19
|
+
def fiscal_year_month(offset_month=10)
|
20
|
+
shifted_month = month - (offset_month - 1)
|
21
|
+
shifted_month += 12 if shifted_month < 0
|
22
|
+
shifted_month
|
23
|
+
end
|
24
|
+
def fiscal_year_quarter(offset_month=10)
|
25
|
+
((fiscal_year_month(offset_month) - 1) / 3) + 1
|
26
|
+
end
|
27
|
+
def fiscal_year(offset_month=10)
|
28
|
+
month >= offset_month ? year + 1 : year
|
29
|
+
end
|
30
|
+
def fiscal_year_yday(offset_month=10)
|
31
|
+
offset_days = 0
|
32
|
+
1.upto(offset_month - 1) { |m| offset_days += ::Time.days_in_month(m, year) }
|
33
|
+
shifted_year_day = yday - offset_days
|
34
|
+
shifted_year_day += 365 if shifted_year_day <= 0
|
35
|
+
shifted_year_day
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/etl/engine.rb
CHANGED
@@ -1,40 +1,58 @@
|
|
1
1
|
module ETL #:nodoc:
|
2
|
-
|
3
|
-
|
4
|
-
# since AR connections are tied to the class, and using ActiveRecord::Base
|
5
|
-
# directly can cause problems if the connection is closed.
|
6
|
-
class Base < ::ActiveRecord::Base
|
7
|
-
end
|
2
|
+
|
3
|
+
class Base < ActiveRecord::Base
|
8
4
|
end
|
9
5
|
|
10
6
|
# The main ETL engine clas
|
11
7
|
class Engine
|
8
|
+
include ETL::Util
|
9
|
+
|
12
10
|
class << self
|
13
11
|
# Initialization that is run when a job is executed.
|
12
|
+
#
|
13
|
+
# Options:
|
14
|
+
# * <tt>:limit</tt>: Limit the number of records returned from sources
|
15
|
+
# * <tt>:offset</tt>: Specify the records for data from sources
|
16
|
+
# * <tt>:log_write_mode</tt>: If true then the log will write, otherwise it will append
|
17
|
+
# * <tt>:skip_bulk_import</tt>: Set to true to skip bulk import
|
18
|
+
# * <tt>:read_locally</tt>: Set to true to read from the local cache
|
19
|
+
# * <tt>:rails_root</tt>: Set to the rails root to boot rails
|
14
20
|
def init(options={})
|
15
21
|
unless @initialized
|
22
|
+
puts "initializing ETL engine"
|
16
23
|
@limit = options[:limit]
|
17
24
|
@offset = options[:offset]
|
18
25
|
@log_write_mode = 'w' if options[:newlog]
|
19
26
|
@skip_bulk_import = options[:skip_bulk_import]
|
20
27
|
@read_locally = options[:read_locally]
|
28
|
+
@rails_root = options[:rails_root]
|
29
|
+
|
30
|
+
require File.join(@rails_root, 'config/environment') if @rails_root
|
31
|
+
|
21
32
|
options[:config] ||= 'database.yml'
|
22
33
|
database_configuration = YAML::load(ERB.new(IO.read(options[:config])).result + "\n")
|
23
|
-
|
24
|
-
|
34
|
+
ActiveRecord::Base.configurations.merge!(database_configuration)
|
35
|
+
ETL::Base.configurations = database_configuration
|
36
|
+
#puts "configurations in init: #{ActiveRecord::Base.configurations.inspect}"
|
37
|
+
|
25
38
|
require 'etl/execution'
|
26
39
|
ETL::Execution::Base.establish_connection :etl_execution
|
27
40
|
ETL::Execution::Execution.migrate
|
41
|
+
|
28
42
|
@initialized = true
|
29
43
|
end
|
30
44
|
end
|
31
45
|
|
32
|
-
# Process the specified
|
46
|
+
# Process the specified file. Acceptable values for file are:
|
33
47
|
# * Path to a file
|
34
48
|
# * File object
|
35
49
|
# * ETL::Control::Control instance
|
36
|
-
|
37
|
-
|
50
|
+
# * ETL::Batch::Batch instance
|
51
|
+
#
|
52
|
+
# The process command will accept either a .ctl Control file or a .ebf
|
53
|
+
# ETL Batch File.
|
54
|
+
def process(file)
|
55
|
+
new().process(file)
|
38
56
|
end
|
39
57
|
|
40
58
|
attr_accessor :timestamped_log
|
@@ -55,7 +73,7 @@ module ETL #:nodoc:
|
|
55
73
|
else
|
56
74
|
@logger = Logger.new(File.open('etl.log', log_write_mode))
|
57
75
|
end
|
58
|
-
@logger.level = Logger::
|
76
|
+
@logger.level = Logger::WARN
|
59
77
|
@logger.formatter = Logger::Formatter.new
|
60
78
|
end
|
61
79
|
@logger
|
@@ -94,6 +112,9 @@ module ETL #:nodoc:
|
|
94
112
|
# Access the current ETL::Execution::Job instance
|
95
113
|
attr_accessor :job
|
96
114
|
|
115
|
+
# Access the current ETL::Execution::Batch instance
|
116
|
+
attr_accessor :batch
|
117
|
+
|
97
118
|
# The limit on rows to load from the source, useful for testing the ETL
|
98
119
|
# process prior to executing the entire batch. Default value is nil and
|
99
120
|
# indicates that there is no limit
|
@@ -112,7 +133,80 @@ module ETL #:nodoc:
|
|
112
133
|
|
113
134
|
# Accessor for the average rows per second processed
|
114
135
|
attr_accessor :average_rows_per_second
|
115
|
-
|
136
|
+
|
137
|
+
# Get a named connection
|
138
|
+
def connection(name)
|
139
|
+
logger.debug "Retrieving connection #{name}"
|
140
|
+
conn = connections[name] ||= establish_connection(name)
|
141
|
+
#conn.verify!(ActiveRecord::Base.verification_timeout)
|
142
|
+
conn.reconnect! unless conn.active?
|
143
|
+
conn
|
144
|
+
end
|
145
|
+
|
146
|
+
# Set to true to use temp tables
|
147
|
+
attr_accessor :use_temp_tables
|
148
|
+
|
149
|
+
# Get a registry of temp tables
|
150
|
+
def temp_tables
|
151
|
+
@temp_tables ||= {}
|
152
|
+
end
|
153
|
+
|
154
|
+
# Called when a batch job finishes, allowing for cleanup to occur
|
155
|
+
def finish
|
156
|
+
temp_tables.each do |temp_table, mapping|
|
157
|
+
actual_table = mapping[:table]
|
158
|
+
#puts "move #{temp_table} to #{actual_table}"
|
159
|
+
conn = mapping[:connection]
|
160
|
+
conn.transaction do
|
161
|
+
conn.rename_table(actual_table, "#{actual_table}_old")
|
162
|
+
conn.rename_table(temp_table, actual_table)
|
163
|
+
conn.drop_table("#{actual_table}_old")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Return true if using temp tables
|
169
|
+
def use_temp_tables?
|
170
|
+
use_temp_tables ? true : false
|
171
|
+
end
|
172
|
+
|
173
|
+
# Modify the table name if necessary
|
174
|
+
def table(table_name, connection)
|
175
|
+
if use_temp_tables?
|
176
|
+
returning "tmp_#{table_name}" do |temp_table_name|
|
177
|
+
if temp_tables[temp_table_name].nil?
|
178
|
+
# Create the temp table and add it to the mapping
|
179
|
+
begin connection.drop_table(temp_table_name); rescue; end
|
180
|
+
connection.execute(
|
181
|
+
connection.add_select_into_table(temp_table_name, "SELECT * FROM #{table_name}")
|
182
|
+
)
|
183
|
+
temp_tables[temp_table_name] = {
|
184
|
+
:table => table_name,
|
185
|
+
:connection => connection
|
186
|
+
}
|
187
|
+
end
|
188
|
+
end
|
189
|
+
else
|
190
|
+
table_name
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
protected
|
195
|
+
# Hash of database connections that can be used throughout the ETL
|
196
|
+
# process
|
197
|
+
def connections
|
198
|
+
@connections ||= {}
|
199
|
+
end
|
200
|
+
|
201
|
+
# Establish the named connection and return the database specific connection
|
202
|
+
def establish_connection(name)
|
203
|
+
logger.debug "Establishing connection to #{name}"
|
204
|
+
conn_config = ETL::Base.configurations[name.to_s]
|
205
|
+
raise ETL::ETLError, "No connection found for #{name}" unless conn_config
|
206
|
+
connection_method = "#{conn_config['adapter']}_connection"
|
207
|
+
ETL::Base.send(connection_method, conn_config)
|
208
|
+
end
|
209
|
+
end # class << self
|
116
210
|
|
117
211
|
# Say the specified message, with a newline
|
118
212
|
def say(message)
|
@@ -121,7 +215,7 @@ module ETL #:nodoc:
|
|
121
215
|
|
122
216
|
# Say the specified message without a newline
|
123
217
|
def say_without_newline(message)
|
124
|
-
if Engine.realtime_activity
|
218
|
+
if ETL::Engine.realtime_activity
|
125
219
|
$stdout.print message
|
126
220
|
$stdout.flush
|
127
221
|
end
|
@@ -153,11 +247,48 @@ module ETL #:nodoc:
|
|
153
247
|
}
|
154
248
|
end
|
155
249
|
|
156
|
-
# Process a control
|
250
|
+
# Process a file, control object or batch object. Acceptable values for
|
251
|
+
# file are:
|
157
252
|
# * Path to a file
|
158
253
|
# * File object
|
159
254
|
# * ETL::Control::Control instance
|
160
|
-
|
255
|
+
# * ETL::Batch::Batch instance
|
256
|
+
def process(file)
|
257
|
+
case file
|
258
|
+
when String
|
259
|
+
process(File.new(file))
|
260
|
+
when File
|
261
|
+
process_control(file) if file.path =~ /.ctl$/
|
262
|
+
process_batch(file) if file.path =~ /.ebf$/
|
263
|
+
when ETL::Control::Control
|
264
|
+
process_control(file)
|
265
|
+
when ETL::Batch::Batch
|
266
|
+
process_batch(file)
|
267
|
+
else
|
268
|
+
raise RuntimeError, "Process object must be a String, File, Control
|
269
|
+
instance or Batch instance"
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
protected
|
274
|
+
# Process the specified batch file
|
275
|
+
def process_batch(batch)
|
276
|
+
batch = ETL::Batch::Batch.resolve(batch, self)
|
277
|
+
|
278
|
+
ETL::Engine.batch = ETL::Execution::Batch.create!(
|
279
|
+
:batch_file => batch.file,
|
280
|
+
:status => 'executing'
|
281
|
+
)
|
282
|
+
|
283
|
+
batch.execute
|
284
|
+
|
285
|
+
ETL::Engine.batch.completed_at = Time.now
|
286
|
+
ETL::Engine.batch.status = (errors.length > 0 ? 'completed with errors' : 'completed')
|
287
|
+
ETL::Engine.batch.save!
|
288
|
+
end
|
289
|
+
|
290
|
+
# Process the specified control file
|
291
|
+
def process_control(control)
|
161
292
|
control = ETL::Control::Control.resolve(control)
|
162
293
|
|
163
294
|
ETL::Engine.job = ETL::Execution::Job.create!(
|
@@ -168,11 +299,7 @@ module ETL #:nodoc:
|
|
168
299
|
execute_dependencies(control)
|
169
300
|
|
170
301
|
start_time = Time.now
|
171
|
-
|
172
|
-
Engine.logger.debug "Pre-processing #{control.file}"
|
173
302
|
pre_process(control)
|
174
|
-
Engine.logger.debug "Pre-processing complete"
|
175
|
-
|
176
303
|
sources = control.sources
|
177
304
|
destinations = control.destinations
|
178
305
|
|
@@ -195,9 +322,7 @@ module ETL #:nodoc:
|
|
195
322
|
Engine.logger.debug "Row #{index}: #{row.inspect}"
|
196
323
|
Engine.rows_read += 1
|
197
324
|
Engine.current_source_row = index + 1
|
198
|
-
if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
199
|
-
say_without_newline "."
|
200
|
-
end
|
325
|
+
say_without_newline "." if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
201
326
|
|
202
327
|
# At this point a single row may be turned into multiple rows via row
|
203
328
|
# processors all code after this line should work with the array of
|
@@ -225,7 +350,6 @@ module ETL #:nodoc:
|
|
225
350
|
|
226
351
|
t = Benchmark.realtime do
|
227
352
|
begin
|
228
|
-
# execute transforms
|
229
353
|
Engine.logger.debug "Executing transforms"
|
230
354
|
rows.each do |row|
|
231
355
|
control.transforms.each do |transform|
|
@@ -253,9 +377,7 @@ module ETL #:nodoc:
|
|
253
377
|
Engine.logger.debug "Processing before write"
|
254
378
|
control.before_write_processors.each do |processor|
|
255
379
|
processed_rows = []
|
256
|
-
rows.each
|
257
|
-
processed_rows << processor.process(row)
|
258
|
-
end
|
380
|
+
rows.each { |row| processed_rows << processor.process(row) }
|
259
381
|
rows = processed_rows.flatten.compact
|
260
382
|
end
|
261
383
|
rescue => e
|
@@ -300,11 +422,20 @@ module ETL #:nodoc:
|
|
300
422
|
destination.close
|
301
423
|
end
|
302
424
|
|
303
|
-
say_on_own_line "Executing
|
304
|
-
|
425
|
+
say_on_own_line "Executing screens"
|
426
|
+
begin
|
427
|
+
execute_screens(control)
|
428
|
+
rescue FatalScreenError => e
|
429
|
+
say "Fatal screen error during job execution: #{e.message}"
|
430
|
+
exit
|
431
|
+
rescue ScreenError => e
|
432
|
+
say "Screen error during job execution: #{e.message}"
|
433
|
+
return
|
434
|
+
else
|
435
|
+
say "Screens passed"
|
436
|
+
end
|
437
|
+
|
305
438
|
post_process(control)
|
306
|
-
Engine.logger.debug "Post-processing complete"
|
307
|
-
say "Post-processing complete"
|
308
439
|
|
309
440
|
if sources.length > 0
|
310
441
|
say_on_own_line "Read #{Engine.rows_read} lines from sources"
|
@@ -339,16 +470,22 @@ module ETL #:nodoc:
|
|
339
470
|
|
340
471
|
# Execute all preprocessors
|
341
472
|
def pre_process(control)
|
473
|
+
Engine.logger.debug "Pre-processing #{control.file}"
|
342
474
|
control.pre_processors.each do |processor|
|
343
475
|
processor.process
|
344
476
|
end
|
477
|
+
Engine.logger.debug "Pre-processing complete"
|
345
478
|
end
|
346
479
|
|
347
480
|
# Execute all postprocessors
|
348
481
|
def post_process(control)
|
482
|
+
say_on_own_line "Executing post processes"
|
483
|
+
Engine.logger.debug "Post-processing #{control.file}"
|
349
484
|
control.post_processors.each do |processor|
|
350
485
|
processor.process
|
351
486
|
end
|
487
|
+
Engine.logger.debug "Post-processing complete"
|
488
|
+
say "Post-processing complete"
|
352
489
|
end
|
353
490
|
|
354
491
|
# Execute all dependencies
|
@@ -371,59 +508,23 @@ module ETL #:nodoc:
|
|
371
508
|
end
|
372
509
|
end
|
373
510
|
|
374
|
-
#
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
s << "#{distance_in_days} days," if distance_in_days > 0
|
391
|
-
s << "#{distance_in_hours} hours, " if distance_in_hours > 0
|
392
|
-
s << "#{distance_in_minutes} minutes, " if distance_in_minutes > 0
|
393
|
-
s << "#{distance_in_seconds} seconds"
|
394
|
-
s
|
395
|
-
end
|
396
|
-
|
397
|
-
# Get the approximate disntance of time in words from the given from_time
|
398
|
-
# to the the given to_time. If to_time is not specified then it is set
|
399
|
-
# to Time.now. By default seconds are included...set the include_seconds
|
400
|
-
# argument to false to disable the seconds.
|
401
|
-
def approximate_distance_of_time_in_words(from_time, to_time=Time.now, include_seconds=true)
|
402
|
-
from_time = from_time.to_time if from_time.respond_to?(:to_time)
|
403
|
-
to_time = to_time.to_time if to_time.respond_to?(:to_time)
|
404
|
-
distance_in_minutes = (((to_time - from_time).abs)/60).round
|
405
|
-
distance_in_seconds = ((to_time - from_time).abs).round
|
406
|
-
|
407
|
-
case distance_in_minutes
|
408
|
-
when 0..1
|
409
|
-
return (distance_in_minutes == 0) ? 'less than a minute' : '1 minute' unless include_seconds
|
410
|
-
case distance_in_seconds
|
411
|
-
when 0..4 then 'less than 5 seconds'
|
412
|
-
when 5..9 then 'less than 10 seconds'
|
413
|
-
when 10..19 then 'less than 20 seconds'
|
414
|
-
when 20..39 then 'half a minute'
|
415
|
-
when 40..59 then 'less than a minute'
|
416
|
-
else '1 minute'
|
511
|
+
# Execute all screens
|
512
|
+
def execute_screens(control)
|
513
|
+
[:fatal,:error,:warn].each do |type|
|
514
|
+
control.screens[type].each do |block|
|
515
|
+
begin
|
516
|
+
block.call
|
517
|
+
rescue => e
|
518
|
+
case type
|
519
|
+
when :fatal
|
520
|
+
raise FatalScreenError, e
|
521
|
+
when :error
|
522
|
+
raise ScreenError, e
|
523
|
+
when :warn
|
524
|
+
say "Screen warning: #{e}"
|
525
|
+
end
|
526
|
+
end
|
417
527
|
end
|
418
|
-
when 2..44 then "#{distance_in_minutes} minutes"
|
419
|
-
when 45..89 then 'about 1 hour'
|
420
|
-
when 90..1439 then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
|
421
|
-
when 1440..2879 then '1 day'
|
422
|
-
when 2880..43199 then "#{(distance_in_minutes / 1440).round} days"
|
423
|
-
when 43200..86399 then 'about 1 month'
|
424
|
-
when 86400..525959 then "#{(distance_in_minutes / 43200).round} months"
|
425
|
-
when 525960..1051919 then 'about 1 year'
|
426
|
-
else "over #{(distance_in_minutes / 525960).round} years"
|
427
528
|
end
|
428
529
|
end
|
429
530
|
end
|