activewarehouse-etl 0.9.5.rc1 → 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/.standalone_migrations +2 -0
  2. data/.travis.yml +15 -0
  3. data/CHANGELOG +10 -1
  4. data/HOW_TO_RELEASE +4 -0
  5. data/LICENSE +1 -1
  6. data/README.textile +111 -0
  7. data/Rakefile +37 -78
  8. data/activewarehouse-etl.gemspec +7 -4
  9. data/db/migrate/20120229203554_create_tables.rb +37 -0
  10. data/db/schema.rb +45 -0
  11. data/examples/database.example.yml +3 -3
  12. data/lib/etl.rb +16 -0
  13. data/lib/etl/commands/etl.rb +1 -0
  14. data/lib/etl/control/control.rb +1 -1
  15. data/lib/etl/control/destination.rb +5 -16
  16. data/lib/etl/control/destination/csv_destination.rb +122 -0
  17. data/lib/etl/control/destination/excel_destination.rb +1 -1
  18. data/lib/etl/control/destination/insert_update_database_destination.rb +6 -3
  19. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  20. data/lib/etl/control/source.rb +39 -4
  21. data/lib/etl/control/source/database_source.rb +6 -1
  22. data/lib/etl/control/source/file_source.rb +4 -0
  23. data/lib/etl/control/source/mysql_streamer.rb +31 -0
  24. data/lib/etl/engine.rb +40 -20
  25. data/lib/etl/parser/{delimited_parser.rb → csv_parser.rb} +3 -3
  26. data/lib/etl/parser/excel_parser.rb +1 -1
  27. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  28. data/lib/etl/processor/bulk_import_processor.rb +11 -0
  29. data/lib/etl/processor/check_exist_processor.rb +6 -6
  30. data/lib/etl/processor/check_unique_processor.rb +4 -0
  31. data/lib/etl/processor/database_join_processor.rb +25 -4
  32. data/lib/etl/processor/encode_processor.rb +0 -2
  33. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  34. data/lib/etl/processor/imapattachment_downloader_processor.rb +2 -2
  35. data/lib/etl/processor/pop3attachment_downloader_processor.rb +2 -2
  36. data/lib/etl/processor/row_processor.rb +10 -0
  37. data/lib/etl/processor/sftp_downloader_processor.rb +1 -1
  38. data/lib/etl/processor/sftp_uploader_processor.rb +1 -1
  39. data/lib/etl/processor/truncate_processor.rb +4 -1
  40. data/lib/etl/processor/zip_file_processor.rb +1 -1
  41. data/lib/etl/transform/foreign_key_lookup_transform.rb +57 -15
  42. data/lib/etl/transform/md5_transform.rb +13 -0
  43. data/lib/etl/transform/{string_to_datetime_transform.rb → string_to_date_time_transform.rb} +0 -0
  44. data/lib/etl/version.rb +1 -1
  45. data/test/.gitignore +0 -1
  46. data/test/check_exist_processor_test.rb +89 -0
  47. data/test/check_unique_processor_test.rb +40 -0
  48. data/test/config/.gitignore +1 -0
  49. data/test/config/database.yml +28 -0
  50. data/test/config/{Gemfile.rails-3.0.x → gemfiles/Gemfile.rails-3.0.x} +1 -1
  51. data/test/config/{Gemfile.rails-2.3.x → gemfiles/Gemfile.rails-3.1.x} +1 -1
  52. data/test/config/gemfiles/Gemfile.rails-3.2.x +3 -0
  53. data/test/config/gemfiles/common.rb +29 -0
  54. data/test/control_test.rb +2 -2
  55. data/test/data/nokogiri.xml +38 -0
  56. data/test/database_join_processor_test.rb +43 -0
  57. data/test/delimited.ctl +1 -1
  58. data/test/delimited_absolute.ctl +1 -3
  59. data/test/delimited_destination_db.ctl +1 -3
  60. data/test/delimited_excel.ctl +1 -1
  61. data/test/delimited_insert_update.ctl +1 -1
  62. data/test/delimited_update.ctl +1 -1
  63. data/test/delimited_with_bulk_load.ctl +2 -2
  64. data/test/destination_test.rb +0 -4
  65. data/test/encode_processor_test.rb +2 -0
  66. data/test/engine_test.rb +65 -19
  67. data/test/ensure_fields_presence_processor_test.rb +33 -0
  68. data/test/foreign_key_lookup_transform_test.rb +50 -0
  69. data/test/multiple_delimited.ctl +1 -1
  70. data/test/multiple_source_delimited.ctl +2 -2
  71. data/test/nokogiri_all.ctl +35 -0
  72. data/test/nokogiri_select.ctl +35 -0
  73. data/test/nokogiri_test.rb +35 -0
  74. data/test/parser_test.rb +2 -2
  75. data/test/performance/delimited.ctl +1 -1
  76. data/test/processor_test.rb +0 -3
  77. data/test/scd_test.rb +2 -8
  78. data/test/scd_test_type_1.ctl +1 -1
  79. data/test/scd_test_type_2.ctl +1 -1
  80. data/test/screen_test.rb +2 -3
  81. data/test/source_test.rb +19 -6
  82. data/test/test_helper.rb +6 -8
  83. data/test/truncate_processor_test.rb +37 -0
  84. metadata +121 -144
  85. data/README +0 -101
  86. data/active_support_logger.patch +0 -78
  87. data/test-matrix.yml +0 -10
  88. data/test/config/Gemfile.rails-2.3.x.lock +0 -38
  89. data/test/config/Gemfile.rails-3.0.x.lock +0 -49
  90. data/test/config/common.rb +0 -21
  91. data/test/connection/mysql/connection.rb +0 -9
  92. data/test/connection/mysql/schema.sql +0 -36
  93. data/test/connection/postgresql/connection.rb +0 -13
  94. data/test/connection/postgresql/schema.sql +0 -39
  95. data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +0 -26
  96. data/test/vendor/adapter_extensions-0.5.0/LICENSE +0 -16
  97. data/test/vendor/adapter_extensions-0.5.0/README +0 -7
  98. data/test/vendor/adapter_extensions-0.5.0/Rakefile +0 -158
  99. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +0 -12
  100. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +0 -44
  101. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +0 -63
  102. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +0 -52
  103. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +0 -44
  104. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +0 -10
@@ -80,6 +80,7 @@ def execute
80
80
  ARGV.each do |f|
81
81
  ETL::Engine.realtime_activity = true
82
82
  ETL::Engine.process(f)
83
+ exit(ETL::Engine.exit_code) if ETL::Engine.exit_code
83
84
  end
84
85
 
85
86
  puts "ETL process complete\n\n"
@@ -283,7 +283,7 @@ module ETL #:nodoc:
283
283
  end
284
284
 
285
285
  def parse_text(text)
286
- control = ETL::Control::Control.new(nil)
286
+ control = ETL::Control::Control.new('no-file')
287
287
  eval(text, Context.create(control), 'inline')
288
288
  control.validate
289
289
  control
@@ -94,16 +94,7 @@ module ETL #:nodoc:
94
94
 
95
95
  # Get the order of elements from the source order
96
96
  def order_from_source
97
- order = []
98
- control.sources.first.definition.each do |item|
99
- case item
100
- when Hash
101
- order << item[:name]
102
- else
103
- order << item
104
- end
105
- end
106
- order
97
+ control.sources.first.order
107
98
  end
108
99
 
109
100
  # Return true if the row is allowed. The row will not be allowed if the
@@ -135,7 +126,7 @@ module ETL #:nodoc:
135
126
 
136
127
  # returns the fields that are required to identify an SCD
137
128
  def scd_required_fields
138
- if scd?
129
+ if scd? and scd_type == 2
139
130
  [scd_effective_date_field, scd_end_date_field, scd_latest_version_field]
140
131
  else
141
132
  []
@@ -249,7 +240,7 @@ module ETL #:nodoc:
249
240
  when Symbol
250
241
  generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new(options)
251
242
  row[key] = generator.next
252
- when Proc
243
+ when Proc, Method
253
244
  row[key] = value.call(row)
254
245
  else
255
246
  if value.is_a?(ETL::Generator::Generator)
@@ -288,12 +279,10 @@ module ETL #:nodoc:
288
279
  statement = []
289
280
  values = []
290
281
  natural_key.each do |nk|
291
- statement << "#{nk} = ?"
292
- values << row[nk]
282
+ statement << "#{nk} = #{ActiveRecord::Base.send(:quote_bound_value, row[nk], connection)}"
293
283
  end
294
284
  statement = statement.join(" AND ")
295
- x=ActiveRecord::Base.send(:sanitize_sql_array, [statement, *values])
296
- return x
285
+ return statement
297
286
  end
298
287
 
299
288
  # Do all the steps required when a SCD *has* changed. Exact steps
@@ -0,0 +1,122 @@
1
+ # This source file contains the ETL::Control::CsvDestination
2
+
3
+ module ETL #:nodoc:
4
+ module Control #:nodoc:
5
+ # CSV File as the final destination.
6
+ class CsvDestination < Destination
7
+ # The File to write to
8
+ attr_reader :file
9
+
10
+ # The output order
11
+ attr_reader :order
12
+
13
+ # Flag which indicates to append (default is to overwrite)
14
+ attr_accessor :append
15
+
16
+ # The separator
17
+ attr_accessor :separator
18
+
19
+ # The end of line marker
20
+ attr_accessor :eol
21
+
22
+ # The enclosure character
23
+ attr_accessor :enclose
24
+
25
+ # Initialize the object.
26
+ # * <tt>control</tt>: The Control object
27
+ # * <tt>configuration</tt>: The configuration map
28
+ # * <tt>mapping</tt>: The output mapping
29
+ #
30
+ # Configuration options:
31
+ # * <tt>:file<tt>: The file to write to (REQUIRED)
32
+ # * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
33
+ # * <tt>:separator</tt>: Record separator (default is a comma)
34
+ # * <tt>:eol</tt>: End of line marker (default is \n)
35
+ # * <tt>:enclose</tt>: Set to true of false
36
+ # * <tt>:unique</tt>: Set to true to only write unique records
37
+ # * <tt>:append_rows</tt>: Array of rows to append
38
+ #
39
+ # Mapping options:
40
+ # * <tt>:order</tt>: The order array
41
+ def initialize(control, configuration, mapping={})
42
+ super
43
+ path = Pathname.new(configuration[:file])
44
+ @file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
45
+ @append = configuration[:append] ||= false
46
+ @separator = configuration[:separator] ||= ','
47
+ @eol = configuration[:eol] ||= "\n"
48
+ @enclose = true & configuration[:enclose]
49
+ @unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
50
+ @unique.uniq! unless @unique.nil?
51
+ @write_header = configuration[:write_header]
52
+ @order = mapping[:order] + scd_required_fields if mapping[:order]
53
+ @order.uniq! unless @order.nil?
54
+ end
55
+
56
+ def order
57
+ @order ||= order_from_source
58
+ end
59
+
60
+ # Close the destination. This will flush the buffer and close the underlying stream or connection.
61
+ def close
62
+ buffer << append_rows if append_rows
63
+ flush
64
+ f.close
65
+ end
66
+
67
+ # Flush the destination buffer
68
+ def flush
69
+ #puts "Flushing buffer (#{file}) with #{buffer.length} rows"
70
+ if @write_header && !@header_written
71
+ f << order
72
+ @header_written = true
73
+ end
74
+
75
+ buffer.flatten.each do |row|
76
+ #puts "row change type: #{row.change_type}"
77
+ # check to see if this row's compound key constraint already exists
78
+ # note that the compound key constraint may not utilize virtual fields
79
+ next unless row_allowed?(row)
80
+
81
+ # add any virtual fields
82
+ add_virtuals!(row)
83
+
84
+ # collect all of the values using the order designated in the configuration
85
+ values = order.collect do |name|
86
+ value = row[name]
87
+ case value
88
+ when Date, Time, DateTime
89
+ value.to_s(:db)
90
+ else
91
+ value.to_s
92
+ end
93
+ end
94
+
95
+ f << values
96
+ end
97
+ f.flush
98
+ buffer.clear
99
+ #puts "After flush there are #{buffer.length} rows"
100
+ end
101
+
102
+ private
103
+ # Get the open file stream
104
+ def f
105
+ @f ||= FasterCSV.open(file, mode, options)
106
+ end
107
+
108
+ def options
109
+ @options ||= {
110
+ :col_sep => separator,
111
+ :row_sep => eol,
112
+ :force_quotes => enclose
113
+ }
114
+ end
115
+
116
+ # Get the appropriate mode to open the file stream
117
+ def mode
118
+ append ? 'a' : 'w'
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,4 +1,4 @@
1
- require 'spreadsheet'
1
+ optional_require 'spreadsheet'
2
2
 
3
3
  module ETL
4
4
  module Control
@@ -74,11 +74,14 @@ module ETL #:nodoc:
74
74
  res = conn.execute(q, "Select row #{current_row}")
75
75
  none = true
76
76
 
77
- case conn
78
- when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter;
77
+ case conn.class.name
78
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
79
79
  res.each { none = false }
80
- when ActiveRecord::ConnectionAdapters::MysqlAdapter;
80
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
81
81
  res.each_hash { none = false }
82
+ res.free
83
+ when "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
84
+ res.each { none = false }
82
85
  else raise "Unsupported adapter #{conn.class} for this destination"
83
86
  end
84
87
 
@@ -0,0 +1,74 @@
1
+ require 'yaml'
2
+
3
+ module ETL #:nodoc:
4
+ module Control #:nodoc:
5
+ class YamlDestination < Destination
6
+ attr_reader :file, :append, :only, :except
7
+ # Initialize the object.
8
+ # * <tt>control</tt>: The Control object
9
+ # * <tt>configuration</tt>: The configuration map
10
+ # * <tt>mapping</tt>: The output mapping
11
+ #
12
+ # Configuration options:
13
+ # * <tt>:file<tt>: The file to write to (REQUIRED)
14
+ # * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
15
+ # * <tt>:only</tt>
16
+ # * <tt>:except</tt>
17
+ def initialize(control, configuration, mapping={})
18
+ super
19
+ @file = File.join(File.dirname(control.file), configuration[:file])
20
+ @append = configuration[:append] ||= false
21
+ @only = configuration[:only]
22
+ @except = configuration[:except]
23
+ raise ControlError, "the :only and :except options must be used seperately, do not specify both" if @only && @except
24
+ end
25
+
26
+ # Close the destination. This will flush the buffer and close the underlying stream or connection.
27
+ def close
28
+ flush
29
+ f.close
30
+ end
31
+
32
+ # Flush the destination buffer
33
+ def flush
34
+ #puts "Flushing buffer (#{file}) with #{buffer.length} rows"
35
+ buffer.flatten.each do |row|
36
+ # check to see if this row's compound key constraint already exists
37
+ # note that the compound key constraint may not utilize virtual fields
38
+ next unless row_allowed?(row)
39
+ # add any virtual fields
40
+ add_virtuals!(row)
41
+
42
+ yaml = {}
43
+ row.each do |key, value|
44
+ next if only && !only.include?(key)
45
+ next if except && except.include?(key)
46
+
47
+ case value
48
+ when Date, Time, DateTime
49
+ value = value.to_s(:db)
50
+ end
51
+
52
+ yaml[key] = value
53
+ end
54
+
55
+ # write the values
56
+ YAML.dump(yaml, f)
57
+ end
58
+ f.flush
59
+ buffer.clear
60
+ end
61
+
62
+ private
63
+ # Get the open file stream
64
+ def f
65
+ @f ||= File.open(file, mode)
66
+ end
67
+
68
+ # Get the appropriate mode to open the file stream
69
+ def mode
70
+ append ? 'a' : 'w'
71
+ end
72
+ end
73
+ end
74
+ end
@@ -40,8 +40,7 @@ module ETL #:nodoc:
40
40
  @configuration = configuration
41
41
  @definition = definition
42
42
 
43
- @store_locally = true
44
- @store_locally = configuration[:store_locally] unless configuration[:store_locally].nil?
43
+ @store_locally = configuration[:store_locally].nil? ? true : configuration[:store_locally]
45
44
  end
46
45
 
47
46
  # Get an array of errors that occur during reading from the source
@@ -87,9 +86,32 @@ module ETL #:nodoc:
87
86
  File.join(local_directory, File.basename(last_local_file_trigger, '.trig'))
88
87
  end
89
88
 
90
- # Get the last local file trigger
89
+ # Get the last local file trigger filename using timestamp in filenames.
90
+ # Filename is in the format YYYYMMDDHHMMSS.csv.trig, but in the case of a
91
+ # file source there is an unpadded sequence number before the file
92
+ # extension. This code may not return the correct "last" file in that
93
+ # case (in particular when there are 10 or more source files). However,
94
+ # at this point only the database source calls the method, and it wouldn't
95
+ # make sense for a file source to use it if multiple files are expected
91
96
  def last_local_file_trigger
92
- Dir.glob(File.join(local_directory, '*.trig')).last
97
+ trig_files = []
98
+ trig_ext = '.csv.trig'
99
+
100
+ # Store the basename (without extension) of all files that end in the
101
+ # desired extension
102
+ Dir.glob(File.join(local_directory, "*" + trig_ext)) do |f|
103
+ # Extract the basename of each file with the extension snipped off
104
+ trig_files << File.basename(f, trig_ext) if File.file?(f)
105
+ end
106
+
107
+ # Throw an exception if no trigger files are available
108
+ raise "Local cache trigger file not found" if trig_files.empty?
109
+
110
+ # Sort trigger file strings and get the last one
111
+ last_trig = trig_files.sort {|a,b| a <=> b}.last
112
+
113
+ # Return the file path including extension
114
+ File.join(local_directory, last_trig + trig_ext)
93
115
  end
94
116
 
95
117
  # Get the local trigger file that is used to indicate that the file has
@@ -103,6 +125,19 @@ module ETL #:nodoc:
103
125
  Engine.read_locally
104
126
  end
105
127
 
128
+ # Get the order of fields that this source will present to the pipeline
129
+ def order
130
+ order = []
131
+ definition.each do |item|
132
+ case item
133
+ when Hash
134
+ order << item[:name]
135
+ else
136
+ order << item
137
+ end
138
+ end
139
+ order
140
+ end
106
141
  end
107
142
  end
108
143
  end
@@ -203,7 +203,12 @@ module ETL #:nodoc:
203
203
  end
204
204
 
205
205
  def query_rows
206
- @query_rows ||= connection.select_all(query)
206
+ return @query_rows if @query_rows
207
+ if (configuration[:mysqlstream] == true)
208
+ MySqlStreamer.new(query,@target)
209
+ else
210
+ connection.select_all(query)
211
+ end
207
212
  end
208
213
 
209
214
  # Get the database connection to use
@@ -53,6 +53,10 @@ module ETL #:nodoc:
53
53
  end
54
54
  end
55
55
 
56
+ def order
57
+ @parser.fields.collect {|field| field.name}
58
+ end
59
+
56
60
  private
57
61
  # Copy source data to a local directory structure
58
62
  def copy_sources
@@ -0,0 +1,31 @@
1
+ require 'open3'
2
+
3
+ class MySqlStreamer
4
+
5
+ def initialize(query, target)
6
+ @query = query
7
+ @name = target
8
+ end
9
+
10
+ def each
11
+ puts "Using the Streaming MySQL from the command line"
12
+ keys = nil
13
+ connection_configuration = ETL::Base.configurations[@name.to_s]
14
+ mysql_command = """mysql --quick -h #{connection_configuration["host"]} -u #{connection_configuration["username"]} -e \"#{@query.gsub("\n","")}\" -D #{connection_configuration["database"]} --password=#{connection_configuration["password"]} -B"""
15
+ Open3.popen3(mysql_command) do |stdin, out, err, external|
16
+ until (line = out.gets).nil? do
17
+ line = line.gsub("\n","")
18
+ if keys.nil?
19
+ keys = line.split("\t")
20
+ else
21
+ hash = Hash[keys.zip(line.split("\t"))]
22
+ yield hash
23
+ end
24
+ end
25
+ error = err.gets
26
+ if (!error.nil? && error.strip.length > 0)
27
+ throw error
28
+ end
29
+ end
30
+ end
31
+ end
@@ -84,6 +84,9 @@ module ETL #:nodoc:
84
84
  Time.now.strftime("%Y%m%d%H%M%S")
85
85
  end
86
86
 
87
+ # exit code to be passed to the command line
88
+ attr_accessor :exit_code
89
+
87
90
  # The current source
88
91
  attr_accessor :current_source
89
92
 
@@ -200,9 +203,11 @@ module ETL #:nodoc:
200
203
 
201
204
  # Establish the named connection and return the database specific connection
202
205
  def establish_connection(name)
206
+ raise ETL::ETLError, "Connection with no name requested. Is there a missing :target parameter somewhere?" if name.blank?
207
+
203
208
  logger.debug "Establishing connection to #{name}"
204
209
  conn_config = ETL::Base.configurations[name.to_s]
205
- raise ETL::ETLError, "No connection found for #{name}" unless conn_config
210
+ raise ETL::ETLError, "Cannot find connection named #{name.inspect}" unless conn_config
206
211
  connection_method = "#{conn_config['adapter']}_connection"
207
212
  ETL::Base.send(connection_method, conn_config)
208
213
  end
@@ -255,15 +260,18 @@ module ETL #:nodoc:
255
260
  # * ETL::Batch::Batch instance
256
261
  def process(file)
257
262
  case file
258
- when String
259
- process(File.new(file))
260
- when File
261
- process_control(file) if file.path =~ /.ctl$/
262
- process_batch(file) if file.path =~ /.ebf$/
263
- when ETL::Control::Control
264
- process_control(file)
265
- when ETL::Batch::Batch
266
- process_batch(file)
263
+ when String
264
+ process(File.new(file))
265
+ when File
266
+ case file.path
267
+ when /.ctl$/; process_control(file)
268
+ when /.ebf$/; process_batch(file)
269
+ else raise RuntimeError, "Unsupported file type - #{file.path}"
270
+ end
271
+ when ETL::Control::Control
272
+ process_control(file)
273
+ when ETL::Batch::Batch
274
+ process_batch(file)
267
275
  else
268
276
  raise RuntimeError, "Process object must be a String, File, Control
269
277
  instance or Batch instance"
@@ -338,14 +346,15 @@ module ETL #:nodoc:
338
346
  control.after_read_processors.each do |processor|
339
347
  processed_rows = []
340
348
  rows.each do |row|
341
- processed_rows << processor.process(row)
349
+ processed_rows << processor.process(row) unless empty_row?(row)
342
350
  end
343
- rows = processed_rows.flatten
351
+ rows = processed_rows.flatten.compact
344
352
  end
345
353
  rescue => e
346
354
  msg = "Error processing rows after read from #{Engine.current_source} on line #{Engine.current_source_row}: #{e}"
347
355
  errors << msg
348
356
  Engine.logger.error(msg)
357
+ e.backtrace.each { |line| Engine.logger.error(line) }
349
358
  exceeded_error_threshold?(control) ? break : next
350
359
  end
351
360
  end
@@ -355,9 +364,12 @@ module ETL #:nodoc:
355
364
  begin
356
365
  Engine.logger.debug "Executing transforms"
357
366
  rows.each do |row|
358
- control.transforms.each do |transform|
359
- name = transform.name.to_sym
360
- row[name] = transform.transform(name, row[name], row)
367
+ # only do the transform if there is a row
368
+ unless empty_row?(row)
369
+ control.transforms.each do |transform|
370
+ name = transform.name.to_sym
371
+ row[name] = transform.transform(name, row[name], row)
372
+ end
361
373
  end
362
374
  end
363
375
  rescue ResolverError => e
@@ -384,7 +396,9 @@ module ETL #:nodoc:
384
396
  Engine.logger.debug "Processing before write"
385
397
  control.before_write_processors.each do |processor|
386
398
  processed_rows = []
387
- rows.each { |row| processed_rows << processor.process(row) }
399
+ rows.each do |row|
400
+ processed_rows << processor.process(row) unless empty_row?(row)
401
+ end
388
402
  rows = processed_rows.flatten.compact
389
403
  end
390
404
  rescue => e
@@ -420,7 +434,7 @@ module ETL #:nodoc:
420
434
 
421
435
  if exceeded_error_threshold?(control)
422
436
  say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
423
- return
437
+ ETL::Engine.exit_code = 1
424
438
  end
425
439
 
426
440
  end
@@ -434,7 +448,7 @@ module ETL #:nodoc:
434
448
  execute_screens(control)
435
449
  rescue FatalScreenError => e
436
450
  say "Fatal screen error during job execution: #{e.message}"
437
- exit
451
+ ETL::Engine.exit_code = 2
438
452
  rescue ScreenError => e
439
453
  say "Screen error during job execution: #{e.message}"
440
454
  return
@@ -456,7 +470,7 @@ module ETL #:nodoc:
456
470
  execute_screens(control, :after_post_process)
457
471
  rescue FatalScreenError => e
458
472
  say "Fatal screen error during job execution: #{e.message}"
459
- exit
473
+ ETL::Engine.exit_code = 3
460
474
  rescue ScreenError => e
461
475
  say "Screen error during job execution: #{e.message}"
462
476
  return
@@ -477,12 +491,18 @@ module ETL #:nodoc:
477
491
  # ETL::Transform::Transform.benchmarks.each do |klass, t|
478
492
  # say "Avg #{klass}: #{Engine.rows_read/t} rows/sec"
479
493
  # end
480
-
494
+
495
+ ActiveRecord::Base.verify_active_connections!
481
496
  ETL::Engine.job.completed_at = Time.now
482
497
  ETL::Engine.job.status = (errors.length > 0 ? 'completed with errors' : 'completed')
483
498
  ETL::Engine.job.save!
484
499
  end
485
500
 
501
+ def empty_row?(row)
502
+ # unsure about why it should respond to :[] - keeping it just in case for the moment
503
+ row.nil? || !row.respond_to?(:[])
504
+ end
505
+
486
506
  private
487
507
  # Return true if the error threshold is exceeded
488
508
  def exceeded_error_threshold?(control)