activewarehouse-etl 0.9.5.rc1 → 1.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.standalone_migrations +2 -0
- data/.travis.yml +15 -0
- data/CHANGELOG +10 -1
- data/HOW_TO_RELEASE +4 -0
- data/LICENSE +1 -1
- data/README.textile +111 -0
- data/Rakefile +37 -78
- data/activewarehouse-etl.gemspec +7 -4
- data/db/migrate/20120229203554_create_tables.rb +37 -0
- data/db/schema.rb +45 -0
- data/examples/database.example.yml +3 -3
- data/lib/etl.rb +16 -0
- data/lib/etl/commands/etl.rb +1 -0
- data/lib/etl/control/control.rb +1 -1
- data/lib/etl/control/destination.rb +5 -16
- data/lib/etl/control/destination/csv_destination.rb +122 -0
- data/lib/etl/control/destination/excel_destination.rb +1 -1
- data/lib/etl/control/destination/insert_update_database_destination.rb +6 -3
- data/lib/etl/control/destination/yaml_destination.rb +74 -0
- data/lib/etl/control/source.rb +39 -4
- data/lib/etl/control/source/database_source.rb +6 -1
- data/lib/etl/control/source/file_source.rb +4 -0
- data/lib/etl/control/source/mysql_streamer.rb +31 -0
- data/lib/etl/engine.rb +40 -20
- data/lib/etl/parser/{delimited_parser.rb → csv_parser.rb} +3 -3
- data/lib/etl/parser/excel_parser.rb +1 -1
- data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
- data/lib/etl/processor/bulk_import_processor.rb +11 -0
- data/lib/etl/processor/check_exist_processor.rb +6 -6
- data/lib/etl/processor/check_unique_processor.rb +4 -0
- data/lib/etl/processor/database_join_processor.rb +25 -4
- data/lib/etl/processor/encode_processor.rb +0 -2
- data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +2 -2
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +2 -2
- data/lib/etl/processor/row_processor.rb +10 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +1 -1
- data/lib/etl/processor/sftp_uploader_processor.rb +1 -1
- data/lib/etl/processor/truncate_processor.rb +4 -1
- data/lib/etl/processor/zip_file_processor.rb +1 -1
- data/lib/etl/transform/foreign_key_lookup_transform.rb +57 -15
- data/lib/etl/transform/md5_transform.rb +13 -0
- data/lib/etl/transform/{string_to_datetime_transform.rb → string_to_date_time_transform.rb} +0 -0
- data/lib/etl/version.rb +1 -1
- data/test/.gitignore +0 -1
- data/test/check_exist_processor_test.rb +89 -0
- data/test/check_unique_processor_test.rb +40 -0
- data/test/config/.gitignore +1 -0
- data/test/config/database.yml +28 -0
- data/test/config/{Gemfile.rails-3.0.x → gemfiles/Gemfile.rails-3.0.x} +1 -1
- data/test/config/{Gemfile.rails-2.3.x → gemfiles/Gemfile.rails-3.1.x} +1 -1
- data/test/config/gemfiles/Gemfile.rails-3.2.x +3 -0
- data/test/config/gemfiles/common.rb +29 -0
- data/test/control_test.rb +2 -2
- data/test/data/nokogiri.xml +38 -0
- data/test/database_join_processor_test.rb +43 -0
- data/test/delimited.ctl +1 -1
- data/test/delimited_absolute.ctl +1 -3
- data/test/delimited_destination_db.ctl +1 -3
- data/test/delimited_excel.ctl +1 -1
- data/test/delimited_insert_update.ctl +1 -1
- data/test/delimited_update.ctl +1 -1
- data/test/delimited_with_bulk_load.ctl +2 -2
- data/test/destination_test.rb +0 -4
- data/test/encode_processor_test.rb +2 -0
- data/test/engine_test.rb +65 -19
- data/test/ensure_fields_presence_processor_test.rb +33 -0
- data/test/foreign_key_lookup_transform_test.rb +50 -0
- data/test/multiple_delimited.ctl +1 -1
- data/test/multiple_source_delimited.ctl +2 -2
- data/test/nokogiri_all.ctl +35 -0
- data/test/nokogiri_select.ctl +35 -0
- data/test/nokogiri_test.rb +35 -0
- data/test/parser_test.rb +2 -2
- data/test/performance/delimited.ctl +1 -1
- data/test/processor_test.rb +0 -3
- data/test/scd_test.rb +2 -8
- data/test/scd_test_type_1.ctl +1 -1
- data/test/scd_test_type_2.ctl +1 -1
- data/test/screen_test.rb +2 -3
- data/test/source_test.rb +19 -6
- data/test/test_helper.rb +6 -8
- data/test/truncate_processor_test.rb +37 -0
- metadata +121 -144
- data/README +0 -101
- data/active_support_logger.patch +0 -78
- data/test-matrix.yml +0 -10
- data/test/config/Gemfile.rails-2.3.x.lock +0 -38
- data/test/config/Gemfile.rails-3.0.x.lock +0 -49
- data/test/config/common.rb +0 -21
- data/test/connection/mysql/connection.rb +0 -9
- data/test/connection/mysql/schema.sql +0 -36
- data/test/connection/postgresql/connection.rb +0 -13
- data/test/connection/postgresql/schema.sql +0 -39
- data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +0 -26
- data/test/vendor/adapter_extensions-0.5.0/LICENSE +0 -16
- data/test/vendor/adapter_extensions-0.5.0/README +0 -7
- data/test/vendor/adapter_extensions-0.5.0/Rakefile +0 -158
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +0 -12
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +0 -44
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +0 -63
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +0 -52
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +0 -44
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +0 -10
data/lib/etl/commands/etl.rb
CHANGED
data/lib/etl/control/control.rb
CHANGED
@@ -94,16 +94,7 @@ module ETL #:nodoc:
|
|
94
94
|
|
95
95
|
# Get the order of elements from the source order
|
96
96
|
def order_from_source
|
97
|
-
order
|
98
|
-
control.sources.first.definition.each do |item|
|
99
|
-
case item
|
100
|
-
when Hash
|
101
|
-
order << item[:name]
|
102
|
-
else
|
103
|
-
order << item
|
104
|
-
end
|
105
|
-
end
|
106
|
-
order
|
97
|
+
control.sources.first.order
|
107
98
|
end
|
108
99
|
|
109
100
|
# Return true if the row is allowed. The row will not be allowed if the
|
@@ -135,7 +126,7 @@ module ETL #:nodoc:
|
|
135
126
|
|
136
127
|
# returns the fields that are required to identify an SCD
|
137
128
|
def scd_required_fields
|
138
|
-
if scd?
|
129
|
+
if scd? and scd_type == 2
|
139
130
|
[scd_effective_date_field, scd_end_date_field, scd_latest_version_field]
|
140
131
|
else
|
141
132
|
[]
|
@@ -249,7 +240,7 @@ module ETL #:nodoc:
|
|
249
240
|
when Symbol
|
250
241
|
generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new(options)
|
251
242
|
row[key] = generator.next
|
252
|
-
when Proc
|
243
|
+
when Proc, Method
|
253
244
|
row[key] = value.call(row)
|
254
245
|
else
|
255
246
|
if value.is_a?(ETL::Generator::Generator)
|
@@ -288,12 +279,10 @@ module ETL #:nodoc:
|
|
288
279
|
statement = []
|
289
280
|
values = []
|
290
281
|
natural_key.each do |nk|
|
291
|
-
statement << "#{nk} =
|
292
|
-
values << row[nk]
|
282
|
+
statement << "#{nk} = #{ActiveRecord::Base.send(:quote_bound_value, row[nk], connection)}"
|
293
283
|
end
|
294
284
|
statement = statement.join(" AND ")
|
295
|
-
|
296
|
-
return x
|
285
|
+
return statement
|
297
286
|
end
|
298
287
|
|
299
288
|
# Do all the steps required when a SCD *has* changed. Exact steps
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# This source file contains the ETL::Control::CsvDestination
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Control #:nodoc:
|
5
|
+
# CSV File as the final destination.
|
6
|
+
class CsvDestination < Destination
|
7
|
+
# The File to write to
|
8
|
+
attr_reader :file
|
9
|
+
|
10
|
+
# The output order
|
11
|
+
attr_reader :order
|
12
|
+
|
13
|
+
# Flag which indicates to append (default is to overwrite)
|
14
|
+
attr_accessor :append
|
15
|
+
|
16
|
+
# The separator
|
17
|
+
attr_accessor :separator
|
18
|
+
|
19
|
+
# The end of line marker
|
20
|
+
attr_accessor :eol
|
21
|
+
|
22
|
+
# The enclosure character
|
23
|
+
attr_accessor :enclose
|
24
|
+
|
25
|
+
# Initialize the object.
|
26
|
+
# * <tt>control</tt>: The Control object
|
27
|
+
# * <tt>configuration</tt>: The configuration map
|
28
|
+
# * <tt>mapping</tt>: The output mapping
|
29
|
+
#
|
30
|
+
# Configuration options:
|
31
|
+
# * <tt>:file<tt>: The file to write to (REQUIRED)
|
32
|
+
# * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
|
33
|
+
# * <tt>:separator</tt>: Record separator (default is a comma)
|
34
|
+
# * <tt>:eol</tt>: End of line marker (default is \n)
|
35
|
+
# * <tt>:enclose</tt>: Set to true of false
|
36
|
+
# * <tt>:unique</tt>: Set to true to only write unique records
|
37
|
+
# * <tt>:append_rows</tt>: Array of rows to append
|
38
|
+
#
|
39
|
+
# Mapping options:
|
40
|
+
# * <tt>:order</tt>: The order array
|
41
|
+
def initialize(control, configuration, mapping={})
|
42
|
+
super
|
43
|
+
path = Pathname.new(configuration[:file])
|
44
|
+
@file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
|
45
|
+
@append = configuration[:append] ||= false
|
46
|
+
@separator = configuration[:separator] ||= ','
|
47
|
+
@eol = configuration[:eol] ||= "\n"
|
48
|
+
@enclose = true & configuration[:enclose]
|
49
|
+
@unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
|
50
|
+
@unique.uniq! unless @unique.nil?
|
51
|
+
@write_header = configuration[:write_header]
|
52
|
+
@order = mapping[:order] + scd_required_fields if mapping[:order]
|
53
|
+
@order.uniq! unless @order.nil?
|
54
|
+
end
|
55
|
+
|
56
|
+
def order
|
57
|
+
@order ||= order_from_source
|
58
|
+
end
|
59
|
+
|
60
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
61
|
+
def close
|
62
|
+
buffer << append_rows if append_rows
|
63
|
+
flush
|
64
|
+
f.close
|
65
|
+
end
|
66
|
+
|
67
|
+
# Flush the destination buffer
|
68
|
+
def flush
|
69
|
+
#puts "Flushing buffer (#{file}) with #{buffer.length} rows"
|
70
|
+
if @write_header && !@header_written
|
71
|
+
f << order
|
72
|
+
@header_written = true
|
73
|
+
end
|
74
|
+
|
75
|
+
buffer.flatten.each do |row|
|
76
|
+
#puts "row change type: #{row.change_type}"
|
77
|
+
# check to see if this row's compound key constraint already exists
|
78
|
+
# note that the compound key constraint may not utilize virtual fields
|
79
|
+
next unless row_allowed?(row)
|
80
|
+
|
81
|
+
# add any virtual fields
|
82
|
+
add_virtuals!(row)
|
83
|
+
|
84
|
+
# collect all of the values using the order designated in the configuration
|
85
|
+
values = order.collect do |name|
|
86
|
+
value = row[name]
|
87
|
+
case value
|
88
|
+
when Date, Time, DateTime
|
89
|
+
value.to_s(:db)
|
90
|
+
else
|
91
|
+
value.to_s
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
f << values
|
96
|
+
end
|
97
|
+
f.flush
|
98
|
+
buffer.clear
|
99
|
+
#puts "After flush there are #{buffer.length} rows"
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
# Get the open file stream
|
104
|
+
def f
|
105
|
+
@f ||= FasterCSV.open(file, mode, options)
|
106
|
+
end
|
107
|
+
|
108
|
+
def options
|
109
|
+
@options ||= {
|
110
|
+
:col_sep => separator,
|
111
|
+
:row_sep => eol,
|
112
|
+
:force_quotes => enclose
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
# Get the appropriate mode to open the file stream
|
117
|
+
def mode
|
118
|
+
append ? 'a' : 'w'
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -74,11 +74,14 @@ module ETL #:nodoc:
|
|
74
74
|
res = conn.execute(q, "Select row #{current_row}")
|
75
75
|
none = true
|
76
76
|
|
77
|
-
case conn
|
78
|
-
when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
77
|
+
case conn.class.name
|
78
|
+
when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
|
79
79
|
res.each { none = false }
|
80
|
-
when ActiveRecord::ConnectionAdapters::MysqlAdapter
|
80
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
|
81
81
|
res.each_hash { none = false }
|
82
|
+
res.free
|
83
|
+
when "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
|
84
|
+
res.each { none = false }
|
82
85
|
else raise "Unsupported adapter #{conn.class} for this destination"
|
83
86
|
end
|
84
87
|
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Control #:nodoc:
|
5
|
+
class YamlDestination < Destination
|
6
|
+
attr_reader :file, :append, :only, :except
|
7
|
+
# Initialize the object.
|
8
|
+
# * <tt>control</tt>: The Control object
|
9
|
+
# * <tt>configuration</tt>: The configuration map
|
10
|
+
# * <tt>mapping</tt>: The output mapping
|
11
|
+
#
|
12
|
+
# Configuration options:
|
13
|
+
# * <tt>:file<tt>: The file to write to (REQUIRED)
|
14
|
+
# * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
|
15
|
+
# * <tt>:only</tt>
|
16
|
+
# * <tt>:except</tt>
|
17
|
+
def initialize(control, configuration, mapping={})
|
18
|
+
super
|
19
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
20
|
+
@append = configuration[:append] ||= false
|
21
|
+
@only = configuration[:only]
|
22
|
+
@except = configuration[:except]
|
23
|
+
raise ControlError, "the :only and :except options must be used seperately, do not specify both" if @only && @except
|
24
|
+
end
|
25
|
+
|
26
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
27
|
+
def close
|
28
|
+
flush
|
29
|
+
f.close
|
30
|
+
end
|
31
|
+
|
32
|
+
# Flush the destination buffer
|
33
|
+
def flush
|
34
|
+
#puts "Flushing buffer (#{file}) with #{buffer.length} rows"
|
35
|
+
buffer.flatten.each do |row|
|
36
|
+
# check to see if this row's compound key constraint already exists
|
37
|
+
# note that the compound key constraint may not utilize virtual fields
|
38
|
+
next unless row_allowed?(row)
|
39
|
+
# add any virtual fields
|
40
|
+
add_virtuals!(row)
|
41
|
+
|
42
|
+
yaml = {}
|
43
|
+
row.each do |key, value|
|
44
|
+
next if only && !only.include?(key)
|
45
|
+
next if except && except.include?(key)
|
46
|
+
|
47
|
+
case value
|
48
|
+
when Date, Time, DateTime
|
49
|
+
value = value.to_s(:db)
|
50
|
+
end
|
51
|
+
|
52
|
+
yaml[key] = value
|
53
|
+
end
|
54
|
+
|
55
|
+
# write the values
|
56
|
+
YAML.dump(yaml, f)
|
57
|
+
end
|
58
|
+
f.flush
|
59
|
+
buffer.clear
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
# Get the open file stream
|
64
|
+
def f
|
65
|
+
@f ||= File.open(file, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Get the appropriate mode to open the file stream
|
69
|
+
def mode
|
70
|
+
append ? 'a' : 'w'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/etl/control/source.rb
CHANGED
@@ -40,8 +40,7 @@ module ETL #:nodoc:
|
|
40
40
|
@configuration = configuration
|
41
41
|
@definition = definition
|
42
42
|
|
43
|
-
@store_locally = true
|
44
|
-
@store_locally = configuration[:store_locally] unless configuration[:store_locally].nil?
|
43
|
+
@store_locally = configuration[:store_locally].nil? ? true : configuration[:store_locally]
|
45
44
|
end
|
46
45
|
|
47
46
|
# Get an array of errors that occur during reading from the source
|
@@ -87,9 +86,32 @@ module ETL #:nodoc:
|
|
87
86
|
File.join(local_directory, File.basename(last_local_file_trigger, '.trig'))
|
88
87
|
end
|
89
88
|
|
90
|
-
# Get the last local file trigger
|
89
|
+
# Get the last local file trigger filename using timestamp in filenames.
|
90
|
+
# Filename is in the format YYYYMMDDHHMMSS.csv.trig, but in the case of a
|
91
|
+
# file source there is an unpadded sequence number before the file
|
92
|
+
# extension. This code may not return the correct "last" file in that
|
93
|
+
# case (in particular when there are 10 or more source files). However,
|
94
|
+
# at this point only the database source calls the method, and it wouldn't
|
95
|
+
# make sense for a file source to use it if multiple files are expected
|
91
96
|
def last_local_file_trigger
|
92
|
-
|
97
|
+
trig_files = []
|
98
|
+
trig_ext = '.csv.trig'
|
99
|
+
|
100
|
+
# Store the basename (without extension) of all files that end in the
|
101
|
+
# desired extension
|
102
|
+
Dir.glob(File.join(local_directory, "*" + trig_ext)) do |f|
|
103
|
+
# Extract the basename of each file with the extension snipped off
|
104
|
+
trig_files << File.basename(f, trig_ext) if File.file?(f)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Throw an exception if no trigger files are available
|
108
|
+
raise "Local cache trigger file not found" if trig_files.empty?
|
109
|
+
|
110
|
+
# Sort trigger file strings and get the last one
|
111
|
+
last_trig = trig_files.sort {|a,b| a <=> b}.last
|
112
|
+
|
113
|
+
# Return the file path including extension
|
114
|
+
File.join(local_directory, last_trig + trig_ext)
|
93
115
|
end
|
94
116
|
|
95
117
|
# Get the local trigger file that is used to indicate that the file has
|
@@ -103,6 +125,19 @@ module ETL #:nodoc:
|
|
103
125
|
Engine.read_locally
|
104
126
|
end
|
105
127
|
|
128
|
+
# Get the order of fields that this source will present to the pipeline
|
129
|
+
def order
|
130
|
+
order = []
|
131
|
+
definition.each do |item|
|
132
|
+
case item
|
133
|
+
when Hash
|
134
|
+
order << item[:name]
|
135
|
+
else
|
136
|
+
order << item
|
137
|
+
end
|
138
|
+
end
|
139
|
+
order
|
140
|
+
end
|
106
141
|
end
|
107
142
|
end
|
108
143
|
end
|
@@ -203,7 +203,12 @@ module ETL #:nodoc:
|
|
203
203
|
end
|
204
204
|
|
205
205
|
def query_rows
|
206
|
-
@query_rows
|
206
|
+
return @query_rows if @query_rows
|
207
|
+
if (configuration[:mysqlstream] == true)
|
208
|
+
MySqlStreamer.new(query,@target)
|
209
|
+
else
|
210
|
+
connection.select_all(query)
|
211
|
+
end
|
207
212
|
end
|
208
213
|
|
209
214
|
# Get the database connection to use
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
class MySqlStreamer
|
4
|
+
|
5
|
+
def initialize(query, target)
|
6
|
+
@query = query
|
7
|
+
@name = target
|
8
|
+
end
|
9
|
+
|
10
|
+
def each
|
11
|
+
puts "Using the Streaming MySQL from the command line"
|
12
|
+
keys = nil
|
13
|
+
connection_configuration = ETL::Base.configurations[@name.to_s]
|
14
|
+
mysql_command = """mysql --quick -h #{connection_configuration["host"]} -u #{connection_configuration["username"]} -e \"#{@query.gsub("\n","")}\" -D #{connection_configuration["database"]} --password=#{connection_configuration["password"]} -B"""
|
15
|
+
Open3.popen3(mysql_command) do |stdin, out, err, external|
|
16
|
+
until (line = out.gets).nil? do
|
17
|
+
line = line.gsub("\n","")
|
18
|
+
if keys.nil?
|
19
|
+
keys = line.split("\t")
|
20
|
+
else
|
21
|
+
hash = Hash[keys.zip(line.split("\t"))]
|
22
|
+
yield hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
error = err.gets
|
26
|
+
if (!error.nil? && error.strip.length > 0)
|
27
|
+
throw error
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/etl/engine.rb
CHANGED
@@ -84,6 +84,9 @@ module ETL #:nodoc:
|
|
84
84
|
Time.now.strftime("%Y%m%d%H%M%S")
|
85
85
|
end
|
86
86
|
|
87
|
+
# exit code to be passed to the command line
|
88
|
+
attr_accessor :exit_code
|
89
|
+
|
87
90
|
# The current source
|
88
91
|
attr_accessor :current_source
|
89
92
|
|
@@ -200,9 +203,11 @@ module ETL #:nodoc:
|
|
200
203
|
|
201
204
|
# Establish the named connection and return the database specific connection
|
202
205
|
def establish_connection(name)
|
206
|
+
raise ETL::ETLError, "Connection with no name requested. Is there a missing :target parameter somewhere?" if name.blank?
|
207
|
+
|
203
208
|
logger.debug "Establishing connection to #{name}"
|
204
209
|
conn_config = ETL::Base.configurations[name.to_s]
|
205
|
-
raise ETL::ETLError, "
|
210
|
+
raise ETL::ETLError, "Cannot find connection named #{name.inspect}" unless conn_config
|
206
211
|
connection_method = "#{conn_config['adapter']}_connection"
|
207
212
|
ETL::Base.send(connection_method, conn_config)
|
208
213
|
end
|
@@ -255,15 +260,18 @@ module ETL #:nodoc:
|
|
255
260
|
# * ETL::Batch::Batch instance
|
256
261
|
def process(file)
|
257
262
|
case file
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
263
|
+
when String
|
264
|
+
process(File.new(file))
|
265
|
+
when File
|
266
|
+
case file.path
|
267
|
+
when /.ctl$/; process_control(file)
|
268
|
+
when /.ebf$/; process_batch(file)
|
269
|
+
else raise RuntimeError, "Unsupported file type - #{file.path}"
|
270
|
+
end
|
271
|
+
when ETL::Control::Control
|
272
|
+
process_control(file)
|
273
|
+
when ETL::Batch::Batch
|
274
|
+
process_batch(file)
|
267
275
|
else
|
268
276
|
raise RuntimeError, "Process object must be a String, File, Control
|
269
277
|
instance or Batch instance"
|
@@ -338,14 +346,15 @@ module ETL #:nodoc:
|
|
338
346
|
control.after_read_processors.each do |processor|
|
339
347
|
processed_rows = []
|
340
348
|
rows.each do |row|
|
341
|
-
processed_rows << processor.process(row)
|
349
|
+
processed_rows << processor.process(row) unless empty_row?(row)
|
342
350
|
end
|
343
|
-
rows = processed_rows.flatten
|
351
|
+
rows = processed_rows.flatten.compact
|
344
352
|
end
|
345
353
|
rescue => e
|
346
354
|
msg = "Error processing rows after read from #{Engine.current_source} on line #{Engine.current_source_row}: #{e}"
|
347
355
|
errors << msg
|
348
356
|
Engine.logger.error(msg)
|
357
|
+
e.backtrace.each { |line| Engine.logger.error(line) }
|
349
358
|
exceeded_error_threshold?(control) ? break : next
|
350
359
|
end
|
351
360
|
end
|
@@ -355,9 +364,12 @@ module ETL #:nodoc:
|
|
355
364
|
begin
|
356
365
|
Engine.logger.debug "Executing transforms"
|
357
366
|
rows.each do |row|
|
358
|
-
|
359
|
-
|
360
|
-
|
367
|
+
# only do the transform if there is a row
|
368
|
+
unless empty_row?(row)
|
369
|
+
control.transforms.each do |transform|
|
370
|
+
name = transform.name.to_sym
|
371
|
+
row[name] = transform.transform(name, row[name], row)
|
372
|
+
end
|
361
373
|
end
|
362
374
|
end
|
363
375
|
rescue ResolverError => e
|
@@ -384,7 +396,9 @@ module ETL #:nodoc:
|
|
384
396
|
Engine.logger.debug "Processing before write"
|
385
397
|
control.before_write_processors.each do |processor|
|
386
398
|
processed_rows = []
|
387
|
-
rows.each
|
399
|
+
rows.each do |row|
|
400
|
+
processed_rows << processor.process(row) unless empty_row?(row)
|
401
|
+
end
|
388
402
|
rows = processed_rows.flatten.compact
|
389
403
|
end
|
390
404
|
rescue => e
|
@@ -420,7 +434,7 @@ module ETL #:nodoc:
|
|
420
434
|
|
421
435
|
if exceeded_error_threshold?(control)
|
422
436
|
say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
|
423
|
-
|
437
|
+
ETL::Engine.exit_code = 1
|
424
438
|
end
|
425
439
|
|
426
440
|
end
|
@@ -434,7 +448,7 @@ module ETL #:nodoc:
|
|
434
448
|
execute_screens(control)
|
435
449
|
rescue FatalScreenError => e
|
436
450
|
say "Fatal screen error during job execution: #{e.message}"
|
437
|
-
|
451
|
+
ETL::Engine.exit_code = 2
|
438
452
|
rescue ScreenError => e
|
439
453
|
say "Screen error during job execution: #{e.message}"
|
440
454
|
return
|
@@ -456,7 +470,7 @@ module ETL #:nodoc:
|
|
456
470
|
execute_screens(control, :after_post_process)
|
457
471
|
rescue FatalScreenError => e
|
458
472
|
say "Fatal screen error during job execution: #{e.message}"
|
459
|
-
|
473
|
+
ETL::Engine.exit_code = 3
|
460
474
|
rescue ScreenError => e
|
461
475
|
say "Screen error during job execution: #{e.message}"
|
462
476
|
return
|
@@ -477,12 +491,18 @@ module ETL #:nodoc:
|
|
477
491
|
# ETL::Transform::Transform.benchmarks.each do |klass, t|
|
478
492
|
# say "Avg #{klass}: #{Engine.rows_read/t} rows/sec"
|
479
493
|
# end
|
480
|
-
|
494
|
+
|
495
|
+
ActiveRecord::Base.verify_active_connections!
|
481
496
|
ETL::Engine.job.completed_at = Time.now
|
482
497
|
ETL::Engine.job.status = (errors.length > 0 ? 'completed with errors' : 'completed')
|
483
498
|
ETL::Engine.job.save!
|
484
499
|
end
|
485
500
|
|
501
|
+
def empty_row?(row)
|
502
|
+
# unsure about why it should respond to :[] - keeping it just in case for the moment
|
503
|
+
row.nil? || !row.respond_to?(:[])
|
504
|
+
end
|
505
|
+
|
486
506
|
private
|
487
507
|
# Return true if the error threshold is exceeded
|
488
508
|
def exceeded_error_threshold?(control)
|