activewarehouse-etl 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +98 -62
- data/Rakefile +11 -0
- data/TODO +2 -1
- data/lib/etl.rb +9 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +1 -0
- data/lib/etl/builder/date_dimension_builder.rb +83 -0
- data/lib/etl/commands/etl.rb +56 -43
- data/lib/etl/control/control.rb +58 -9
- data/lib/etl/control/destination.rb +29 -4
- data/lib/etl/control/destination/database_destination.rb +17 -27
- data/lib/etl/control/source/database_source.rb +17 -40
- data/lib/etl/control/source/file_source.rb +8 -5
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +40 -0
- data/lib/etl/engine.rb +184 -83
- data/lib/etl/execution.rb +1 -0
- data/lib/etl/execution/base.rb +1 -1
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +1 -0
- data/lib/etl/execution/migration.rb +16 -4
- data/lib/etl/generator/surrogate_key_generator.rb +20 -4
- data/lib/etl/http_tools.rb +1 -1
- data/lib/etl/processor/bulk_import_processor.rb +16 -19
- data/lib/etl/processor/check_exist_processor.rb +16 -7
- data/lib/etl/processor/hierarchy_exploder_processor.rb +2 -1
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/surrogate_key_processor.rb +22 -2
- data/lib/etl/processor/truncate_processor.rb +13 -13
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +7 -2
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -5
- data/lib/etl/transform/hierarchy_lookup_transform.rb +7 -14
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +2 -2
- metadata +19 -2
@@ -8,6 +8,9 @@ module ETL #:nodoc:
|
|
8
8
|
module Control #:nodoc:
|
9
9
|
# Source object which extracts data from a database using ActiveRecord.
|
10
10
|
class DatabaseSource < Source
|
11
|
+
attr_accessor :target
|
12
|
+
attr_accessor :table
|
13
|
+
|
11
14
|
# Initialize the source.
|
12
15
|
#
|
13
16
|
# Arguments:
|
@@ -16,38 +19,39 @@ module ETL #:nodoc:
|
|
16
19
|
# * <tt>definition</tt>: The source definition
|
17
20
|
#
|
18
21
|
# Required configuration options:
|
22
|
+
# * <tt>:target</tt>: The target connection
|
19
23
|
# * <tt>:table</tt>: The source table name
|
20
24
|
# * <tt>:database</tt>: The database name
|
21
25
|
#
|
22
26
|
# Other options:
|
23
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
24
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
25
|
-
# * <tt>:password</tt>: The password to the database (defaults to
|
26
|
-
# nothing)
|
27
|
-
# * <tt>:host</tt>: The host for the database (defaults to
|
28
|
-
# 'localhost')
|
29
27
|
# * <tt>:join</tt>: Optional join part for the query (ignored unless
|
30
28
|
# specified)
|
31
29
|
# * <tt>:select</tt>: Optional select part for the query (defaults to
|
32
30
|
# '*')
|
31
|
+
# * <tt>:group</tt>: Optional group by part for the query (ignored
|
32
|
+
# unless specified)
|
33
33
|
# * <tt>:order</tt>: Optional order part for the query (ignored unless
|
34
34
|
# specified)
|
35
|
+
# * <tt>:new_records_only</tt>: Specify the column to use when comparing
|
36
|
+
# timestamps against the last successful ETL job execution for the
|
37
|
+
# current control file.
|
35
38
|
# * <tt>:store_locally</tt>: Set to false to not store a copy of the
|
36
39
|
# source data locally in a flat file (defaults to true)
|
37
40
|
def initialize(control, configuration, definition)
|
38
41
|
super
|
39
|
-
|
42
|
+
@target = configuration[:target]
|
43
|
+
@table = configuration[:table]
|
40
44
|
end
|
41
45
|
|
42
46
|
# Get a String identifier for the source
|
43
47
|
def to_s
|
44
|
-
"#{host}/#{
|
48
|
+
"#{host}/#{database}/#{table}"
|
45
49
|
end
|
46
50
|
|
47
51
|
# Get the local directory to use, which is a combination of the
|
48
52
|
# local_base, the db hostname the db database name and the db table.
|
49
53
|
def local_directory
|
50
|
-
File.join(local_base, host,
|
54
|
+
File.join(local_base, host, database, configuration[:table])
|
51
55
|
end
|
52
56
|
|
53
57
|
# Get the join part of the query, defaults to nil
|
@@ -202,43 +206,16 @@ module ETL #:nodoc:
|
|
202
206
|
|
203
207
|
# Get the database connection to use
|
204
208
|
def connection
|
205
|
-
ETL::
|
206
|
-
end
|
207
|
-
|
208
|
-
# Get the adapter name, defaults to :mysql
|
209
|
-
def adapter
|
210
|
-
configuration[:adapter] || :mysql
|
209
|
+
ETL::Engine.connection(target)
|
211
210
|
end
|
212
211
|
|
213
212
|
# Get the host, defaults to 'localhost'
|
214
213
|
def host
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
|
-
# Get the username, defaults to 'root'
|
219
|
-
def username
|
220
|
-
configuration[:username] || 'root'
|
214
|
+
ETL::Base.configurations[target.to_s]['host'] || 'localhost'
|
221
215
|
end
|
222
216
|
|
223
|
-
|
224
|
-
|
225
|
-
# Required options:
|
226
|
-
# * <tt>:database</tt>: The database name
|
227
|
-
#
|
228
|
-
# Options:
|
229
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
230
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
231
|
-
# * <tt>:password</tt>: The password to the database (defaults
|
232
|
-
# to nothing)
|
233
|
-
# * <tt>:host<tt>: The host for the database (defaults to 'localhost')
|
234
|
-
def connect
|
235
|
-
ETL::Source.establish_connection(
|
236
|
-
:adapter => adapter,
|
237
|
-
:username => username,
|
238
|
-
:host => host,
|
239
|
-
:password => configuration[:password],
|
240
|
-
:database => configuration[:database]
|
241
|
-
)
|
217
|
+
def database
|
218
|
+
ETL::Base.configurations[target.to_s]['database']
|
242
219
|
end
|
243
220
|
end
|
244
221
|
end
|
@@ -40,13 +40,16 @@ module ETL #:nodoc:
|
|
40
40
|
|
41
41
|
# Returns each row from the source
|
42
42
|
def each
|
43
|
+
count = 0
|
43
44
|
copy_sources if store_locally
|
44
45
|
@parser.each do |row|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
if ETL::Engine.offset && count < ETL::Engine.offset
|
47
|
+
count += 1
|
48
|
+
else
|
49
|
+
row = ETL::Row[row]
|
50
|
+
row.source = self
|
51
|
+
yield row
|
52
|
+
end
|
50
53
|
end
|
51
54
|
end
|
52
55
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#RAILS_ENV = 'development'
|
2
|
+
#require '../config/environment'
|
3
|
+
|
4
|
+
module ETL #:nodoc:
|
5
|
+
module Control #:nodoc:
|
6
|
+
class ModelSource < Source
|
7
|
+
|
8
|
+
def columns
|
9
|
+
case definition
|
10
|
+
when Array
|
11
|
+
definition.collect(&:to_sym)
|
12
|
+
when Hash
|
13
|
+
definition.keys.collect(&:to_sym)
|
14
|
+
else
|
15
|
+
raise "Definition must be either an Array or a Hash"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def railsmodel
|
20
|
+
configuration[:model]
|
21
|
+
end
|
22
|
+
|
23
|
+
def order
|
24
|
+
configuration[:order] || "id"
|
25
|
+
end
|
26
|
+
|
27
|
+
def each(&block)
|
28
|
+
railsmodel.to_s.camelize.constantize.find(:all,:order=>order).each do |row|
|
29
|
+
result_row = ETL::Row.new
|
30
|
+
result_row.source = self
|
31
|
+
columns.each do |column|
|
32
|
+
result_row[column.to_sym] = row.send(column)
|
33
|
+
end
|
34
|
+
yield result_row
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/etl/core_ext.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'etl/core_ext/time'
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module CoreExtensions #:nodoc:
|
3
|
+
module Time #:nodoc:
|
4
|
+
# Enables the use of time calculations within Time itself
|
5
|
+
module Calculations
|
6
|
+
def week
|
7
|
+
cyw = ((yday - 1) / 7) + 1
|
8
|
+
cyw = 52 if cyw == 53
|
9
|
+
cyw
|
10
|
+
end
|
11
|
+
def quarter
|
12
|
+
((month - 1) / 3) + 1
|
13
|
+
end
|
14
|
+
def fiscal_year_week(offset_month=10)
|
15
|
+
fyw = ((fiscal_year_yday(offset_month) - 1) / 7) + 1
|
16
|
+
fyw = 52 if fyw == 53
|
17
|
+
fyw
|
18
|
+
end
|
19
|
+
def fiscal_year_month(offset_month=10)
|
20
|
+
shifted_month = month - (offset_month - 1)
|
21
|
+
shifted_month += 12 if shifted_month < 0
|
22
|
+
shifted_month
|
23
|
+
end
|
24
|
+
def fiscal_year_quarter(offset_month=10)
|
25
|
+
((fiscal_year_month(offset_month) - 1) / 3) + 1
|
26
|
+
end
|
27
|
+
def fiscal_year(offset_month=10)
|
28
|
+
month >= offset_month ? year + 1 : year
|
29
|
+
end
|
30
|
+
def fiscal_year_yday(offset_month=10)
|
31
|
+
offset_days = 0
|
32
|
+
1.upto(offset_month - 1) { |m| offset_days += ::Time.days_in_month(m, year) }
|
33
|
+
shifted_year_day = yday - offset_days
|
34
|
+
shifted_year_day += 365 if shifted_year_day <= 0
|
35
|
+
shifted_year_day
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/etl/engine.rb
CHANGED
@@ -1,40 +1,58 @@
|
|
1
1
|
module ETL #:nodoc:
|
2
|
-
|
3
|
-
|
4
|
-
# since AR connections are tied to the class, and using ActiveRecord::Base
|
5
|
-
# directly can cause problems if the connection is closed.
|
6
|
-
class Base < ::ActiveRecord::Base
|
7
|
-
end
|
2
|
+
|
3
|
+
class Base < ActiveRecord::Base
|
8
4
|
end
|
9
5
|
|
10
6
|
# The main ETL engine clas
|
11
7
|
class Engine
|
8
|
+
include ETL::Util
|
9
|
+
|
12
10
|
class << self
|
13
11
|
# Initialization that is run when a job is executed.
|
12
|
+
#
|
13
|
+
# Options:
|
14
|
+
# * <tt>:limit</tt>: Limit the number of records returned from sources
|
15
|
+
# * <tt>:offset</tt>: Specify the records for data from sources
|
16
|
+
# * <tt>:log_write_mode</tt>: If true then the log will write, otherwise it will append
|
17
|
+
# * <tt>:skip_bulk_import</tt>: Set to true to skip bulk import
|
18
|
+
# * <tt>:read_locally</tt>: Set to true to read from the local cache
|
19
|
+
# * <tt>:rails_root</tt>: Set to the rails root to boot rails
|
14
20
|
def init(options={})
|
15
21
|
unless @initialized
|
22
|
+
puts "initializing ETL engine"
|
16
23
|
@limit = options[:limit]
|
17
24
|
@offset = options[:offset]
|
18
25
|
@log_write_mode = 'w' if options[:newlog]
|
19
26
|
@skip_bulk_import = options[:skip_bulk_import]
|
20
27
|
@read_locally = options[:read_locally]
|
28
|
+
@rails_root = options[:rails_root]
|
29
|
+
|
30
|
+
require File.join(@rails_root, 'config/environment') if @rails_root
|
31
|
+
|
21
32
|
options[:config] ||= 'database.yml'
|
22
33
|
database_configuration = YAML::load(ERB.new(IO.read(options[:config])).result + "\n")
|
23
|
-
|
24
|
-
|
34
|
+
ActiveRecord::Base.configurations.merge!(database_configuration)
|
35
|
+
ETL::Base.configurations = database_configuration
|
36
|
+
#puts "configurations in init: #{ActiveRecord::Base.configurations.inspect}"
|
37
|
+
|
25
38
|
require 'etl/execution'
|
26
39
|
ETL::Execution::Base.establish_connection :etl_execution
|
27
40
|
ETL::Execution::Execution.migrate
|
41
|
+
|
28
42
|
@initialized = true
|
29
43
|
end
|
30
44
|
end
|
31
45
|
|
32
|
-
# Process the specified
|
46
|
+
# Process the specified file. Acceptable values for file are:
|
33
47
|
# * Path to a file
|
34
48
|
# * File object
|
35
49
|
# * ETL::Control::Control instance
|
36
|
-
|
37
|
-
|
50
|
+
# * ETL::Batch::Batch instance
|
51
|
+
#
|
52
|
+
# The process command will accept either a .ctl Control file or a .ebf
|
53
|
+
# ETL Batch File.
|
54
|
+
def process(file)
|
55
|
+
new().process(file)
|
38
56
|
end
|
39
57
|
|
40
58
|
attr_accessor :timestamped_log
|
@@ -55,7 +73,7 @@ module ETL #:nodoc:
|
|
55
73
|
else
|
56
74
|
@logger = Logger.new(File.open('etl.log', log_write_mode))
|
57
75
|
end
|
58
|
-
@logger.level = Logger::
|
76
|
+
@logger.level = Logger::WARN
|
59
77
|
@logger.formatter = Logger::Formatter.new
|
60
78
|
end
|
61
79
|
@logger
|
@@ -94,6 +112,9 @@ module ETL #:nodoc:
|
|
94
112
|
# Access the current ETL::Execution::Job instance
|
95
113
|
attr_accessor :job
|
96
114
|
|
115
|
+
# Access the current ETL::Execution::Batch instance
|
116
|
+
attr_accessor :batch
|
117
|
+
|
97
118
|
# The limit on rows to load from the source, useful for testing the ETL
|
98
119
|
# process prior to executing the entire batch. Default value is nil and
|
99
120
|
# indicates that there is no limit
|
@@ -112,7 +133,80 @@ module ETL #:nodoc:
|
|
112
133
|
|
113
134
|
# Accessor for the average rows per second processed
|
114
135
|
attr_accessor :average_rows_per_second
|
115
|
-
|
136
|
+
|
137
|
+
# Get a named connection
|
138
|
+
def connection(name)
|
139
|
+
logger.debug "Retrieving connection #{name}"
|
140
|
+
conn = connections[name] ||= establish_connection(name)
|
141
|
+
#conn.verify!(ActiveRecord::Base.verification_timeout)
|
142
|
+
conn.reconnect! unless conn.active?
|
143
|
+
conn
|
144
|
+
end
|
145
|
+
|
146
|
+
# Set to true to use temp tables
|
147
|
+
attr_accessor :use_temp_tables
|
148
|
+
|
149
|
+
# Get a registry of temp tables
|
150
|
+
def temp_tables
|
151
|
+
@temp_tables ||= {}
|
152
|
+
end
|
153
|
+
|
154
|
+
# Called when a batch job finishes, allowing for cleanup to occur
|
155
|
+
def finish
|
156
|
+
temp_tables.each do |temp_table, mapping|
|
157
|
+
actual_table = mapping[:table]
|
158
|
+
#puts "move #{temp_table} to #{actual_table}"
|
159
|
+
conn = mapping[:connection]
|
160
|
+
conn.transaction do
|
161
|
+
conn.rename_table(actual_table, "#{actual_table}_old")
|
162
|
+
conn.rename_table(temp_table, actual_table)
|
163
|
+
conn.drop_table("#{actual_table}_old")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Return true if using temp tables
|
169
|
+
def use_temp_tables?
|
170
|
+
use_temp_tables ? true : false
|
171
|
+
end
|
172
|
+
|
173
|
+
# Modify the table name if necessary
|
174
|
+
def table(table_name, connection)
|
175
|
+
if use_temp_tables?
|
176
|
+
returning "tmp_#{table_name}" do |temp_table_name|
|
177
|
+
if temp_tables[temp_table_name].nil?
|
178
|
+
# Create the temp table and add it to the mapping
|
179
|
+
begin connection.drop_table(temp_table_name); rescue; end
|
180
|
+
connection.execute(
|
181
|
+
connection.add_select_into_table(temp_table_name, "SELECT * FROM #{table_name}")
|
182
|
+
)
|
183
|
+
temp_tables[temp_table_name] = {
|
184
|
+
:table => table_name,
|
185
|
+
:connection => connection
|
186
|
+
}
|
187
|
+
end
|
188
|
+
end
|
189
|
+
else
|
190
|
+
table_name
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
protected
|
195
|
+
# Hash of database connections that can be used throughout the ETL
|
196
|
+
# process
|
197
|
+
def connections
|
198
|
+
@connections ||= {}
|
199
|
+
end
|
200
|
+
|
201
|
+
# Establish the named connection and return the database specific connection
|
202
|
+
def establish_connection(name)
|
203
|
+
logger.debug "Establishing connection to #{name}"
|
204
|
+
conn_config = ETL::Base.configurations[name.to_s]
|
205
|
+
raise ETL::ETLError, "No connection found for #{name}" unless conn_config
|
206
|
+
connection_method = "#{conn_config['adapter']}_connection"
|
207
|
+
ETL::Base.send(connection_method, conn_config)
|
208
|
+
end
|
209
|
+
end # class << self
|
116
210
|
|
117
211
|
# Say the specified message, with a newline
|
118
212
|
def say(message)
|
@@ -121,7 +215,7 @@ module ETL #:nodoc:
|
|
121
215
|
|
122
216
|
# Say the specified message without a newline
|
123
217
|
def say_without_newline(message)
|
124
|
-
if Engine.realtime_activity
|
218
|
+
if ETL::Engine.realtime_activity
|
125
219
|
$stdout.print message
|
126
220
|
$stdout.flush
|
127
221
|
end
|
@@ -153,11 +247,48 @@ module ETL #:nodoc:
|
|
153
247
|
}
|
154
248
|
end
|
155
249
|
|
156
|
-
# Process a control
|
250
|
+
# Process a file, control object or batch object. Acceptable values for
|
251
|
+
# file are:
|
157
252
|
# * Path to a file
|
158
253
|
# * File object
|
159
254
|
# * ETL::Control::Control instance
|
160
|
-
|
255
|
+
# * ETL::Batch::Batch instance
|
256
|
+
def process(file)
|
257
|
+
case file
|
258
|
+
when String
|
259
|
+
process(File.new(file))
|
260
|
+
when File
|
261
|
+
process_control(file) if file.path =~ /.ctl$/
|
262
|
+
process_batch(file) if file.path =~ /.ebf$/
|
263
|
+
when ETL::Control::Control
|
264
|
+
process_control(file)
|
265
|
+
when ETL::Batch::Batch
|
266
|
+
process_batch(file)
|
267
|
+
else
|
268
|
+
raise RuntimeError, "Process object must be a String, File, Control
|
269
|
+
instance or Batch instance"
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
protected
|
274
|
+
# Process the specified batch file
|
275
|
+
def process_batch(batch)
|
276
|
+
batch = ETL::Batch::Batch.resolve(batch, self)
|
277
|
+
|
278
|
+
ETL::Engine.batch = ETL::Execution::Batch.create!(
|
279
|
+
:batch_file => batch.file,
|
280
|
+
:status => 'executing'
|
281
|
+
)
|
282
|
+
|
283
|
+
batch.execute
|
284
|
+
|
285
|
+
ETL::Engine.batch.completed_at = Time.now
|
286
|
+
ETL::Engine.batch.status = (errors.length > 0 ? 'completed with errors' : 'completed')
|
287
|
+
ETL::Engine.batch.save!
|
288
|
+
end
|
289
|
+
|
290
|
+
# Process the specified control file
|
291
|
+
def process_control(control)
|
161
292
|
control = ETL::Control::Control.resolve(control)
|
162
293
|
|
163
294
|
ETL::Engine.job = ETL::Execution::Job.create!(
|
@@ -168,11 +299,7 @@ module ETL #:nodoc:
|
|
168
299
|
execute_dependencies(control)
|
169
300
|
|
170
301
|
start_time = Time.now
|
171
|
-
|
172
|
-
Engine.logger.debug "Pre-processing #{control.file}"
|
173
302
|
pre_process(control)
|
174
|
-
Engine.logger.debug "Pre-processing complete"
|
175
|
-
|
176
303
|
sources = control.sources
|
177
304
|
destinations = control.destinations
|
178
305
|
|
@@ -195,9 +322,7 @@ module ETL #:nodoc:
|
|
195
322
|
Engine.logger.debug "Row #{index}: #{row.inspect}"
|
196
323
|
Engine.rows_read += 1
|
197
324
|
Engine.current_source_row = index + 1
|
198
|
-
if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
199
|
-
say_without_newline "."
|
200
|
-
end
|
325
|
+
say_without_newline "." if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
201
326
|
|
202
327
|
# At this point a single row may be turned into multiple rows via row
|
203
328
|
# processors all code after this line should work with the array of
|
@@ -225,7 +350,6 @@ module ETL #:nodoc:
|
|
225
350
|
|
226
351
|
t = Benchmark.realtime do
|
227
352
|
begin
|
228
|
-
# execute transforms
|
229
353
|
Engine.logger.debug "Executing transforms"
|
230
354
|
rows.each do |row|
|
231
355
|
control.transforms.each do |transform|
|
@@ -253,9 +377,7 @@ module ETL #:nodoc:
|
|
253
377
|
Engine.logger.debug "Processing before write"
|
254
378
|
control.before_write_processors.each do |processor|
|
255
379
|
processed_rows = []
|
256
|
-
rows.each
|
257
|
-
processed_rows << processor.process(row)
|
258
|
-
end
|
380
|
+
rows.each { |row| processed_rows << processor.process(row) }
|
259
381
|
rows = processed_rows.flatten.compact
|
260
382
|
end
|
261
383
|
rescue => e
|
@@ -300,11 +422,20 @@ module ETL #:nodoc:
|
|
300
422
|
destination.close
|
301
423
|
end
|
302
424
|
|
303
|
-
say_on_own_line "Executing
|
304
|
-
|
425
|
+
say_on_own_line "Executing screens"
|
426
|
+
begin
|
427
|
+
execute_screens(control)
|
428
|
+
rescue FatalScreenError => e
|
429
|
+
say "Fatal screen error during job execution: #{e.message}"
|
430
|
+
exit
|
431
|
+
rescue ScreenError => e
|
432
|
+
say "Screen error during job execution: #{e.message}"
|
433
|
+
return
|
434
|
+
else
|
435
|
+
say "Screens passed"
|
436
|
+
end
|
437
|
+
|
305
438
|
post_process(control)
|
306
|
-
Engine.logger.debug "Post-processing complete"
|
307
|
-
say "Post-processing complete"
|
308
439
|
|
309
440
|
if sources.length > 0
|
310
441
|
say_on_own_line "Read #{Engine.rows_read} lines from sources"
|
@@ -339,16 +470,22 @@ module ETL #:nodoc:
|
|
339
470
|
|
340
471
|
# Execute all preprocessors
|
341
472
|
def pre_process(control)
|
473
|
+
Engine.logger.debug "Pre-processing #{control.file}"
|
342
474
|
control.pre_processors.each do |processor|
|
343
475
|
processor.process
|
344
476
|
end
|
477
|
+
Engine.logger.debug "Pre-processing complete"
|
345
478
|
end
|
346
479
|
|
347
480
|
# Execute all postprocessors
|
348
481
|
def post_process(control)
|
482
|
+
say_on_own_line "Executing post processes"
|
483
|
+
Engine.logger.debug "Post-processing #{control.file}"
|
349
484
|
control.post_processors.each do |processor|
|
350
485
|
processor.process
|
351
486
|
end
|
487
|
+
Engine.logger.debug "Post-processing complete"
|
488
|
+
say "Post-processing complete"
|
352
489
|
end
|
353
490
|
|
354
491
|
# Execute all dependencies
|
@@ -371,59 +508,23 @@ module ETL #:nodoc:
|
|
371
508
|
end
|
372
509
|
end
|
373
510
|
|
374
|
-
#
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
s << "#{distance_in_days} days," if distance_in_days > 0
|
391
|
-
s << "#{distance_in_hours} hours, " if distance_in_hours > 0
|
392
|
-
s << "#{distance_in_minutes} minutes, " if distance_in_minutes > 0
|
393
|
-
s << "#{distance_in_seconds} seconds"
|
394
|
-
s
|
395
|
-
end
|
396
|
-
|
397
|
-
# Get the approximate disntance of time in words from the given from_time
|
398
|
-
# to the the given to_time. If to_time is not specified then it is set
|
399
|
-
# to Time.now. By default seconds are included...set the include_seconds
|
400
|
-
# argument to false to disable the seconds.
|
401
|
-
def approximate_distance_of_time_in_words(from_time, to_time=Time.now, include_seconds=true)
|
402
|
-
from_time = from_time.to_time if from_time.respond_to?(:to_time)
|
403
|
-
to_time = to_time.to_time if to_time.respond_to?(:to_time)
|
404
|
-
distance_in_minutes = (((to_time - from_time).abs)/60).round
|
405
|
-
distance_in_seconds = ((to_time - from_time).abs).round
|
406
|
-
|
407
|
-
case distance_in_minutes
|
408
|
-
when 0..1
|
409
|
-
return (distance_in_minutes == 0) ? 'less than a minute' : '1 minute' unless include_seconds
|
410
|
-
case distance_in_seconds
|
411
|
-
when 0..4 then 'less than 5 seconds'
|
412
|
-
when 5..9 then 'less than 10 seconds'
|
413
|
-
when 10..19 then 'less than 20 seconds'
|
414
|
-
when 20..39 then 'half a minute'
|
415
|
-
when 40..59 then 'less than a minute'
|
416
|
-
else '1 minute'
|
511
|
+
# Execute all screens
|
512
|
+
def execute_screens(control)
|
513
|
+
[:fatal,:error,:warn].each do |type|
|
514
|
+
control.screens[type].each do |block|
|
515
|
+
begin
|
516
|
+
block.call
|
517
|
+
rescue => e
|
518
|
+
case type
|
519
|
+
when :fatal
|
520
|
+
raise FatalScreenError, e
|
521
|
+
when :error
|
522
|
+
raise ScreenError, e
|
523
|
+
when :warn
|
524
|
+
say "Screen warning: #{e}"
|
525
|
+
end
|
526
|
+
end
|
417
527
|
end
|
418
|
-
when 2..44 then "#{distance_in_minutes} minutes"
|
419
|
-
when 45..89 then 'about 1 hour'
|
420
|
-
when 90..1439 then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
|
421
|
-
when 1440..2879 then '1 day'
|
422
|
-
when 2880..43199 then "#{(distance_in_minutes / 1440).round} days"
|
423
|
-
when 43200..86399 then 'about 1 month'
|
424
|
-
when 86400..525959 then "#{(distance_in_minutes / 43200).round} months"
|
425
|
-
when 525960..1051919 then 'about 1 year'
|
426
|
-
else "over #{(distance_in_minutes / 525960).round} years"
|
427
528
|
end
|
428
529
|
end
|
429
530
|
end
|