activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,12 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Debugging processor for printing the current row
4
+ class PrintRowProcessor < ETL::Processor::RowProcessor
5
+ # Process the row
6
+ def process(row)
7
+ puts row.inspect
8
+ row
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,25 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Base class for pre and post processors. Subclasses must implement the +process+ method.
4
+ class Processor
5
+ def initialize(control, configuration)
6
+ @control = control
7
+ @configuration = configuration
8
+ after_initialize if respond_to?(:after_initialize)
9
+ end
10
+ protected
11
+ # Get the control object
12
+ def control
13
+ @control
14
+ end
15
+ # Get the configuration Hash
16
+ def configuration
17
+ @configuration
18
+ end
19
+ # Get the engine logger
20
+ def log
21
+ Engine.logger
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,24 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Row level processor to rename a field in the row.
4
+ #
5
+ # Configuration options:
6
+ # * <tt>:source</tt>: the source field name
7
+ # * <tt>:dest</tt>: The destination field name
8
+ class RenameProcessor < ETL::Processor::RowProcessor
9
+ def process(row)
10
+ source_value = row[configuration[:source]]
11
+ case source_value
12
+ when Numeric
13
+ row[configuration[:dest]] = source_value
14
+ when nil
15
+ row[configuration[:dest]] = nil
16
+ else
17
+ row[configuration[:dest]] = source_value.dup
18
+ end
19
+ row.delete(configuration[:source])
20
+ row
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # A processor which requires that the particular fields are non-blank in
4
+ # order for the row to be retained.
5
+ class RequireNonBlankProcessor < ETL::Processor::RowProcessor
6
+ # An array of fields to check
7
+ attr_reader :fields
8
+
9
+ # Initialize the processor
10
+ #
11
+ # Options:
12
+ # * <tt>:fields</tt>: An array of fields to check, for example:
13
+ # [:first_name,:last_name]
14
+ def initialize(control, configuration)
15
+ super
16
+ @fields = configuration[:fields] || []
17
+ end
18
+
19
+ # Process the row.
20
+ def process(row)
21
+ fields.each { |field| return if row[field].blank? }
22
+ row
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,27 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Processor which processes a specific row. Unlike a transformer, which deals with a specific
4
+ # value in the row, row processors can process an entire row at once, which can be used to
5
+ # explode a single row into multiple rows (for example)
6
+ class RowProcessor < Processor
7
+ # Initialize the processor
8
+ def initialize(control, configuration)
9
+ super
10
+ end
11
+ # Process the specified row. This method must return the row.
12
+ def process(row)
13
+ raise "process_row is an abstract method"
14
+ end
15
+
16
+ # Ensure a given row keys include all the provided columns
17
+ # and raise an error using the provided message if it doesn't
18
+ def ensure_columns_available_in_row!(row, columns, message)
19
+ unless columns.nil?
20
+ columns.each do |k|
21
+ raise(ETL::ControlError, "Row missing required field #{k.inspect} #{message}") unless row.keys.include?(k)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,23 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Row level processor to generate a sequence.
4
+ #
5
+ # Configuration options:
6
+ # * <tt>:context</tt>: A context name, if none is specified then the context will be
7
+ # the current ETL run
8
+ # * <tt>:dest</tt>: The destination field name
9
+ class SequenceProcessor < ETL::Processor::RowProcessor
10
+ def process(row)
11
+ sequences[configuration[:context]] ||= 0
12
+ row[configuration[:dest]] = sequences[configuration[:context]] += 1
13
+ row
14
+ end
15
+
16
+ protected
17
+ # Get a Hash of sequences
18
+ def sequences
19
+ @sequences ||= {}
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,63 @@
1
+ optional_require 'net/sftp'
2
+
3
+ module ETL
4
+ module Processor
5
+ # Custom processor to download files via SFTP
6
+ class SftpDownloaderProcessor < ETL::Processor::Processor
7
+ attr_reader :host
8
+ attr_reader :port
9
+ attr_reader :remote_dir
10
+ attr_reader :files
11
+ attr_reader :username
12
+ attr_reader :local_dir
13
+
14
+ # configuration options include:
15
+ # * host - hostname or IP address of FTP server (required)
16
+ # * port - port number for FTP server (default: 22)
17
+ # * remote_dir - remote path on FTP server (default: /)
18
+ # * files - list of files to download from FTP server (default: [])
19
+ # * username - username for FTP server authentication (default: anonymous)
20
+ # * password - password for FTP server authentication (default: nil)
21
+ # * local_dir - local output directory to save downloaded files (default: '')
22
+ #
23
+ # As an example you might write something like the following in your control process file:
24
+ # pre_process :sftp_downloader, {
25
+ # :host => 'sftp.sec.gov',
26
+ # :path => 'edgar/Feed/2007/QTR2',
27
+ # :files => ['20070402.nc.tar.gz', '20070403.nc.tar.gz', '20070404.nc.tar.gz',
28
+ # '20070405.nc.tar.gz', '20070406.nc.tar.gz'],
29
+ # :local_dir => '/data/sec/2007/04',
30
+ # }
31
+ # The above example will anonymously download via SFTP the first week's worth of SEC filing feed data
32
+ # from the second quarter of 2007 and download the files to the local directory +/data/sec/2007/04+.
33
+ def initialize(control, configuration)
34
+ @host = configuration[:host]
35
+ @port = configuration[:port] || 22
36
+ @remote_dir = configuration[:remote_dir] || '/'
37
+ @files = configuration[:files] || []
38
+ @username = configuration[:username] || 'anonymous'
39
+ @password = configuration[:password]
40
+ @local_dir = configuration[:local_dir] || ''
41
+ end
42
+
43
+ def process
44
+ Net::SFTP.start(@host, @username, {:port => @port, :password => @password}) do |conn|
45
+ @files.each do |f|
46
+ conn.download!(remote_file(f), local_file(f))
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+ attr_accessor :password
53
+
54
+ def local_file(name)
55
+ File.join(@local_dir, name)
56
+ end
57
+
58
+ def remote_file(name)
59
+ File.join(@remote_dir, name)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,63 @@
1
+ optional_require 'net/sftp'
2
+
3
+ module ETL
4
+ module Processor
5
+ # Custom processor to download files via SFTP
6
+ class SftpUploaderProcessor < ETL::Processor::Processor
7
+ attr_reader :host
8
+ attr_reader :port
9
+ attr_reader :remote_dir
10
+ attr_reader :files
11
+ attr_reader :username
12
+ attr_reader :local_dir
13
+
14
+ # configuration options include:
15
+ # * host - hostname or IP address of FTP server (required)
16
+ # * port - port number for FTP server (default: 22)
17
+ # * remote_dir - remote path on FTP server (default: /)
18
+ # * files - list of files to download from FTP server (default: [])
19
+ # * username - username for FTP server authentication (default: anonymous)
20
+ # * password - password for FTP server authentication (default: nil)
21
+ # * local_dir - local output directory to save downloaded files (default: '')
22
+ #
23
+ # As an example you might write something like the following in your control process file:
24
+ # pre_process :sftp_uploader, {
25
+ # :host => 'sftp.sec.gov',
26
+ # :path => 'edgar/Feed/2007/QTR2',
27
+ # :files => ['20070402.nc.tar.gz', '20070403.nc.tar.gz', '20070404.nc.tar.gz',
28
+ # '20070405.nc.tar.gz', '20070406.nc.tar.gz'],
29
+ # :local_dir => '/data/sec/2007/04',
30
+ # }
31
+ # The above example will anonymously download via SFTP the first week's worth of SEC filing feed data
32
+ # from the second quarter of 2007 and download the files to the local directory +/data/sec/2007/04+.
33
+ def initialize(control, configuration)
34
+ @host = configuration[:host]
35
+ @port = configuration[:port] || 22
36
+ @remote_dir = configuration[:remote_dir] || '/'
37
+ @files = configuration[:files] || []
38
+ @username = configuration[:username] || 'anonymous'
39
+ @password = configuration[:password]
40
+ @local_dir = configuration[:local_dir] || ''
41
+ end
42
+
43
+ def process
44
+ Net::SFTP.start(@host, @username, {:port => @port, :password => @password}) do |conn|
45
+ @files.each do |f|
46
+ conn.upload!(local_file(f), remote_file(f))
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+ attr_accessor :password
53
+
54
+ def local_file(name)
55
+ File.join(@local_dir, name)
56
+ end
57
+
58
+ def remote_file(name)
59
+ File.join(@remote_dir, name)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,53 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # A row level processor that provides surrogate keys
4
+ class SurrogateKeyProcessor < ETL::Processor::RowProcessor
5
+ attr_accessor :destination
6
+ attr_accessor :table
7
+ attr_accessor :column
8
+ attr_accessor :target
9
+
10
+ # Initialize the surrogate key generator
11
+ #
12
+ # Configuration options
13
+ # * <tt>:query</tt>: If specified it contains a query to be used to
14
+ # locate the last surrogate key. If this is specified then :target
15
+ # must also be specified.
16
+ # * <tt>:target</tt>: The target connection
17
+ # * <tt>:destination</tt>: The destination column name (defaults to :id)
18
+ def initialize(control, configuration)
19
+ super
20
+ @table = configuration[:table]
21
+ @column = configuration[:column] || 'id'
22
+ @target = configuration[:target]
23
+ if configuration[:query]
24
+ raise ControlError, "Query option is no longer value, use :column and :table instead"
25
+ end
26
+ if table
27
+ @surrogate_key = ETL::Engine.connection(target).select_value("SELECT max(#{column}) FROM #{table_name}")
28
+ end
29
+ #puts "initial surrogate key: #{@surrogate_key}"
30
+ @surrogate_key = 0 if @surrogate_key.blank?
31
+ @surrogate_key = @surrogate_key.to_i
32
+ #puts "surrogate key: #{@surrogate_key}"
33
+ @destination = configuration[:destination] || :id
34
+ end
35
+
36
+ # Add a surrogate key to the row
37
+ def process(row)
38
+ if row
39
+ #puts "processing row #{row.inspect}"
40
+ @surrogate_key += 1
41
+ #puts "adding surrogate key to row: #{@surrogate_key}"
42
+ row[destination] = @surrogate_key
43
+ row
44
+ end
45
+ end
46
+
47
+ private
48
+ def table_name
49
+ ETL::Engine.table(table, ETL::Engine.connection(target))
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # A processor which will truncate a table. Use as a pre-processor for cleaning out a table
4
+ # prior to loading
5
+ class TruncateProcessor < ETL::Processor::Processor
6
+ # Defines the table to truncate
7
+ attr_reader :table
8
+
9
+ # Defines the database connection to use
10
+ attr_reader :target
11
+
12
+ # Initialize the processor
13
+ #
14
+ # Options:
15
+ # * <tt>:target</tt>: The target connection
16
+ # * <tt>:table</tt>: The table name
17
+ # * <tt>:options</tt>: Optional truncate options
18
+ def initialize(control, configuration)
19
+ super
20
+ #@file = File.join(File.dirname(control.file), configuration[:file])
21
+ @target = configuration[:target] || {}
22
+ @table = configuration[:table]
23
+ @options = configuration[:options]
24
+ end
25
+
26
+ def process
27
+ conn = ETL::Engine.connection(target)
28
+ if conn.is_a?(ActiveRecord::ConnectionAdapters::PostgreSQLAdapter)
29
+ @options ||= 'RESTART IDENTITY'
30
+ end
31
+ conn.truncate(table_name, @options)
32
+ end
33
+
34
+ private
35
+ def table_name
36
+ ETL::Engine.table(table, ETL::Engine.connection(target))
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,27 @@
1
+ optional_require 'zip/zip'
2
+
3
+ module ETL
4
+ module Processor
5
+ # Custom processor to zip files
6
+ class ZipFileProcessor < ETL::Processor::Processor
7
+ attr_reader :infile
8
+ attr_reader :destination
9
+
10
+ # configuration options include:
11
+ # * infile - File to zip (required)
12
+ # * destination - Zip file name (default: #{infile}.zip)
13
+ def initialize(control, configuration)
14
+ path = Pathname.new(configuration[:infile])
15
+ @infile = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(configuration[:infile]))) + path
16
+ @destination = configuration[:destination] || "#{infile}.zip"
17
+ end
18
+
19
+ def process
20
+ Zip::ZipFile.open(@destination, Zip::ZipFile::CREATE) do |zipfile|
21
+ zipfile.add(@infile.basename, @infile)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,20 @@
1
+ # This source file contains the ETL::Row class.
2
+
3
+ module ETL #:nodoc:
4
+ # This class represents a single row currently passing through the ETL pipeline
5
+ class Row < Hash
6
+ # Accessor for the originating source
7
+ attr_accessor :source
8
+
9
+ # All change types
10
+ CHANGE_TYPES = [:insert, :update, :delete]
11
+
12
+ # Accessor for the row's change type
13
+ attr_accessor :change_type
14
+
15
+ # Get the change type, defaults to :insert
16
+ def change_type
17
+ @change_type ||= :insert
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,14 @@
1
+ # This source file contains the ETL::Screen module and requires all of the
2
+ # screens
3
+
4
+ module ETL #:nodoc:
5
+ # The ETL::Screen module contains pre-built screens useful for checking the
6
+ # ETL state during execution. Screens may be fatal, which will result in
7
+ # termination of the ETL process, errors, which will result in the
8
+ # termination of just the current ETL control file, or warnings, which will
9
+ # result in a warning message.
10
+ module Screen
11
+ end
12
+ end
13
+
14
+ Dir[File.dirname(__FILE__) + "/screen/*.rb"].each { |file| require(file) }
@@ -0,0 +1,20 @@
1
+ module ETL
2
+ module Screen
3
+ # This screen validates the number of rows which will be bulk loaded
4
+ # against the results from some sort of a row count query. If there
5
+ # is a difference then the screen will not pass
6
+ class RowCountScreen
7
+ attr_accessor :control, :configuration
8
+ def initialize(control, configuration={})
9
+ @control = control
10
+ @configuration = configuration
11
+ execute
12
+ end
13
+ def execute
14
+ unless Engine.rows_written == configuration[:rows]
15
+ raise "Rows written (#{Engine.rows_written}) does not match expected rows (#{configuration[:rows]})"
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end