activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,97 @@
1
+ module ETL #:nodoc:
2
+ module Control #:nodoc:
3
+ # Destination which writes directly to a database. This is useful when you are dealing with
4
+ # a small amount of data. For larger amounts of data you should probably use the bulk
5
+ # loader if it is supported with your target database as it will use a much faster load
6
+ # method.
7
+ class DatabaseDestination < Destination
8
+ # The target connection
9
+ attr_reader :target
10
+
11
+ # The table
12
+ attr_reader :table
13
+
14
+ # Specify the order from the source
15
+ attr_reader :order
16
+
17
+ # Set to true to truncate the destination table first
18
+ attr_reader :truncate
19
+
20
+ # Initialize the database destination
21
+ #
22
+ # * <tt>control</tt>: The ETL::Control::Control instance
23
+ # * <tt>configuration</tt>: The configuration Hash
24
+ # * <tt>mapping</tt>: The mapping
25
+ #
26
+ # Configuration options:
27
+ # * <tt>:database</tt>: The database name (REQUIRED)
28
+ # * <tt>:target</tt>: The target connection (REQUIRED)
29
+ # * <tt>:table</tt>: The table to write to (REQUIRED)
30
+ # * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
31
+ # * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
32
+ # * <tt>:append_rows</tt>: Array of rows to append
33
+ #
34
+ # Mapping options:
35
+ # * <tt>:order</tt>: The order of fields to write (REQUIRED)
36
+ def initialize(control, configuration, mapping={})
37
+ super
38
+ @target = configuration[:target]
39
+ @table = configuration[:table]
40
+ @truncate = configuration[:truncate] ||= false
41
+ @unique = configuration[:unique] ? configuration[:unique] + [scd_effective_date_field] : configuration[:unique]
42
+ @unique.uniq! unless @unique.nil?
43
+ @order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
44
+ @order.uniq! unless @order.nil?
45
+ raise ControlError, "Order required in mapping" unless @order
46
+ raise ControlError, "Table required" unless @table
47
+ raise ControlError, "Target required" unless @target
48
+ end
49
+
50
+ # Flush the currently buffered data
51
+ def flush
52
+ conn.transaction do
53
+ buffer.flatten.each do |row|
54
+ # check to see if this row's compound key constraint already exists
55
+ # note that the compound key constraint may not utilize virtual fields
56
+ next unless row_allowed?(row)
57
+
58
+ # add any virtual fields
59
+ add_virtuals!(row)
60
+
61
+ names = []
62
+ values = []
63
+ order.each do |name|
64
+ names << conn.quote_column_name(name)
65
+ values << conn.quote(row[name])
66
+ end
67
+ q = "INSERT INTO #{conn.quote_table_name(table_name)} (#{names.join(',')}) VALUES (#{values.join(',')})"
68
+ ETL::Engine.logger.debug("Executing insert: #{q}")
69
+ conn.insert(q, "Insert row #{current_row}")
70
+ @current_row += 1
71
+ end
72
+ buffer.clear
73
+ end
74
+ end
75
+
76
+ # Close the connection
77
+ def close
78
+ buffer << append_rows if append_rows
79
+ flush
80
+ end
81
+
82
+ private
83
+ def conn
84
+ @conn ||= begin
85
+ conn = ETL::Engine.connection(target)
86
+ conn.truncate(table_name) if truncate
87
+ conn
88
+ end
89
+ end
90
+
91
+ def table_name
92
+ ETL::Engine.table(table, ETL::Engine.connection(target))
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,91 @@
1
+ optional_require 'spreadsheet'
2
+
3
+ module ETL
4
+ module Control
5
+ # Excel as the final destination.
6
+ class ExcelDestination < Destination
7
+ # The File to write to
8
+ attr_reader :file
9
+
10
+ # The output order
11
+ attr_reader :order
12
+
13
+ # Flag which indicates to append (default is to overwrite)
14
+ attr_accessor :append
15
+
16
+ # Initialize the object.
17
+ # * <tt>control</tt>: The Control object
18
+ # * <tt>configuration</tt>: The configuration map
19
+ # * <tt>mapping</tt>: The output mapping
20
+ #
21
+ # Configuration options:
22
+ # * <tt>:file<tt>: The file to write to (REQUIRED)
23
+ # * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
24
+ # * <tt>:unique</tt>: Set to true to only write unique records
25
+ # * <tt>:append_rows</tt>: Array of rows to append
26
+ #
27
+ # Mapping options:
28
+ # * <tt>:order</tt>: The order array
29
+ def initialize(control, configuration, mapping={})
30
+ super
31
+ path = Pathname.new(configuration[:file])
32
+ @file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
33
+ @append = configuration[:append] ||= false
34
+ @unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
35
+ @unique.uniq! unless @unique.nil?
36
+ @order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
37
+ @order.uniq! unless @order.nil?
38
+ raise ControlError, "Order required in mapping" unless @order
39
+ end
40
+
41
+ # Close the destination. This will flush the buffer and close the underlying stream or connection.
42
+ def close
43
+ buffer << append_rows if append_rows
44
+ flush
45
+ book.write(file)
46
+ end
47
+
48
+ # Flush the destination buffer
49
+ def flush
50
+ #puts "Flushing buffer (#{file}) with #{buffer.length} rows"
51
+ buffer.flatten.each_with_index do |row, index|
52
+ #puts "row change type: #{row.change_type}"
53
+ # check to see if this row's compound key constraint already exists
54
+ # note that the compound key constraint may not utilize virtual fields
55
+ next unless row_allowed?(row)
56
+
57
+ # add any virtual fields
58
+ add_virtuals!(row)
59
+
60
+ # collect all of the values using the order designated in the configuration
61
+ values = order.collect do |name|
62
+ value = row[name]
63
+ case value
64
+ when Date, Time, DateTime
65
+ value.to_s(:db)
66
+ else
67
+ value.to_s
68
+ end
69
+ end
70
+
71
+ # write the values
72
+ sheet.insert_row(index, values)
73
+ end
74
+ buffer.clear
75
+ #puts "After flush there are #{buffer.length} rows"
76
+ end
77
+
78
+ private
79
+ # Get the open file excel
80
+ def book
81
+ @book ||= ( append ? Spreadsheet.open(file) : Spreadsheet::Workbook.new(file) )
82
+ end
83
+
84
+ private
85
+ # Get the open sheet
86
+ def sheet
87
+ @sheet ||= ( append ? book.worksheet(0) : book.create_worksheet() )
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,126 @@
1
+ # This source file contains the ETL::Control::FileDestination
2
+
3
+ module ETL #:nodoc:
4
+ module Control #:nodoc:
5
+ # File as the final destination.
6
+ class FileDestination < Destination
7
+ # The File to write to
8
+ attr_reader :file
9
+
10
+ # The output order
11
+ attr_reader :order
12
+
13
+ # Flag which indicates to append (default is to overwrite)
14
+ attr_accessor :append
15
+
16
+ # The separator
17
+ attr_accessor :separator
18
+
19
+ # The end of line marker
20
+ attr_accessor :eol
21
+
22
+ # The enclosure character
23
+ attr_accessor :enclose
24
+
25
+ # Initialize the object.
26
+ # * <tt>control</tt>: The Control object
27
+ # * <tt>configuration</tt>: The configuration map
28
+ # * <tt>mapping</tt>: The output mapping
29
+ #
30
+ # Configuration options:
31
+ # * <tt>:file<tt>: The file to write to (REQUIRED)
32
+ # * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
33
+ # * <tt>:separator</tt>: Record separator (default is a comma)
34
+ # * <tt>:eol</tt>: End of line marker (default is \n)
35
+ # * <tt>:enclose</tt>: Enclosure character (default is none)
36
+ # * <tt>:unique</tt>: Set to true to only write unique records
37
+ # * <tt>:append_rows</tt>: Array of rows to append
38
+ #
39
+ # Mapping options:
40
+ # * <tt>:order</tt>: The order array
41
+ def initialize(control, configuration, mapping={})
42
+ super
43
+ path = Pathname.new(configuration[:file])
44
+ @file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
45
+ @append = configuration[:append] ||= false
46
+ @separator = configuration[:separator] ||= ','
47
+ @eol = configuration[:eol] ||= "\n"
48
+ @enclose = configuration[:enclose]
49
+ @unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
50
+ @unique.uniq! unless @unique.nil?
51
+ @order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
52
+ @order.uniq! unless @order.nil?
53
+ raise ControlError, "Order required in mapping" unless @order
54
+ end
55
+
56
+ # Close the destination. This will flush the buffer and close the underlying stream or connection.
57
+ def close
58
+ buffer << append_rows if append_rows
59
+ flush
60
+ f.close
61
+ end
62
+
63
+ # Flush the destination buffer
64
+ def flush
65
+ #puts "Flushing buffer (#{file}) with #{buffer.length} rows"
66
+ buffer.flatten.each do |row|
67
+ #puts "row change type: #{row.change_type}"
68
+ # check to see if this row's compound key constraint already exists
69
+ # note that the compound key constraint may not utilize virtual fields
70
+ next unless row_allowed?(row)
71
+
72
+ # add any virtual fields
73
+ add_virtuals!(row)
74
+
75
+ # collect all of the values using the order designated in the configuration
76
+ values = order.collect do |name|
77
+ value = row[name]
78
+ case value
79
+ when Date, Time, DateTime
80
+ value.to_s(:db)
81
+ else
82
+ value.to_s
83
+ end
84
+ end
85
+
86
+ values.collect! { |v| v.gsub(/\\/, '\\\\\\\\')}
87
+ values.collect! { |v| v.gsub(separator, "\\#{separator}")}
88
+ values.collect! { |v| v.gsub(/\n|\r/, '')}
89
+
90
+ # enclose the value if required
91
+ if !enclose.nil?
92
+ values.collect! { |v| enclose + v.gsub(/(#{enclose})/, '\\\\\1') + enclose }
93
+ end
94
+
95
+ # write the values joined by the separator defined in the configuration
96
+ f.write(values.join(separator))
97
+
98
+ # write the end-of-line
99
+ f.write(eol)
100
+ end
101
+ f.flush
102
+ buffer.clear
103
+ #puts "After flush there are #{buffer.length} rows"
104
+ end
105
+
106
+ private
107
+ # Get the open file stream
108
+ def f
109
+ @f ||= open(file, mode)
110
+ end
111
+
112
+ def options
113
+ @options ||= {
114
+ :col_sep => separator,
115
+ :row_sep => eol,
116
+ :force_quotes => !enclose.nil?
117
+ }
118
+ end
119
+
120
+ # Get the appropriate mode to open the file stream
121
+ def mode
122
+ append ? 'a' : 'w'
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,136 @@
1
+ module ETL #:nodoc:
2
+ module Control #:nodoc:
3
+ # Destination which writes directly to a database. This is useful when you are dealing with
4
+ # a small amount of data. For larger amounts of data you should probably use the bulk
5
+ # loader if it is supported with your target database as it will use a much faster load
6
+ # method.
7
+ class InsertUpdateDatabaseDestination < Destination
8
+ # The target connection
9
+ attr_reader :target
10
+
11
+ # The table
12
+ attr_reader :table
13
+
14
+ # Specify the order from the source
15
+ attr_reader :order
16
+
17
+ # Specify the primarykey from the source
18
+ attr_reader :primarykey
19
+
20
+ # Set to true to truncate the destination table first
21
+ attr_reader :truncate
22
+
23
+ # Initialize the database destination
24
+ #
25
+ # * <tt>control</tt>: The ETL::Control::Control instance
26
+ # * <tt>configuration</tt>: The configuration Hash
27
+ # * <tt>mapping</tt>: The mapping
28
+ #
29
+ # Configuration options:
30
+ # * <tt>:database</tt>: The database name (REQUIRED)
31
+ # * <tt>:target</tt>: The target connection (REQUIRED)
32
+ # * <tt>:table</tt>: The table to write to (REQUIRED)
33
+ # * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
34
+ # * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
35
+ # * <tt>:append_rows</tt>: Array of rows to append
36
+ #
37
+ # Mapping options:
38
+ # * <tt>:order</tt>: The order of fields to write (REQUIRED)
39
+ # * <tt>:primarykey</tt>: The primary key of fields to select insert or update (REQUIRED)
40
+ def initialize(control, configuration, mapping={})
41
+ super
42
+ @target = configuration[:target]
43
+ @table = configuration[:table]
44
+ @truncate = configuration[:truncate] ||= false
45
+ @unique = configuration[:unique] ? configuration[:unique] + [scd_effective_date_field] : configuration[:unique]
46
+ @unique.uniq! unless @unique.nil?
47
+ @order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
48
+ @order.uniq! unless @order.nil?
49
+ @primarykey = mapping[:primarykey] ? mapping[:primarykey] + scd_required_fields : nil
50
+ @primarykey.uniq! unless @primarykey.nil?
51
+ raise ControlError, "Primarykey required in mapping" unless @primarykey
52
+ raise ControlError, "Order required in mapping" unless @order
53
+ raise ControlError, "Table required" unless @table
54
+ raise ControlError, "Target required" unless @target
55
+ end
56
+
57
+ # Flush the currently buffered data
58
+ def flush
59
+ conn.transaction do
60
+ buffer.flatten.each do |row|
61
+ # check to see if this row's compound key constraint already exists
62
+ # note that the compound key constraint may not utilize virtual fields
63
+ next unless row_allowed?(row)
64
+
65
+ # add any virtual fields
66
+ add_virtuals!(row)
67
+
68
+ primarykeyfilter = []
69
+ primarykey.each do |name|
70
+ primarykeyfilter << "#{conn.quote_column_name(name)} = #{conn.quote(row[name])}"
71
+ end
72
+ q = "SELECT * FROM #{conn.quote_table_name(table_name)} WHERE #{primarykeyfilter.join(' AND ')}"
73
+ ETL::Engine.logger.debug("Executing select: #{q}")
74
+ res = conn.execute(q, "Select row #{current_row}")
75
+ none = true
76
+
77
+ case conn
78
+ when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
79
+ res.each { none = false }
80
+ when ActiveRecord::ConnectionAdapters::MysqlAdapter
81
+ res.each_hash { none = false }
82
+ res.free
83
+ when ActiveRecord::ConnectionAdapters::Mysql2Adapter
84
+ res.each { none = false }
85
+ else raise "Unsupported adapter #{conn.class} for this destination"
86
+ end
87
+
88
+ if none
89
+ names = []
90
+ values = []
91
+ order.each do |name|
92
+ names << conn.quote_column_name(name)
93
+ values << conn.quote(row[name])
94
+ end
95
+ q = "INSERT INTO #{conn.quote_table_name(table_name)} (#{names.join(',')}) VALUES (#{values.join(',')})"
96
+ ETL::Engine.logger.debug("Executing insert: #{q}")
97
+ conn.insert(q, "Insert row #{current_row}")
98
+ else
99
+ updatevalues = []
100
+ order.each do |name|
101
+ updatevalues << "#{conn.quote_column_name(name)} = #{conn.quote(row[name])}"
102
+ end
103
+ q = "UPDATE #{conn.quote_table_name(table_name)} SET #{updatevalues.join(',')} WHERE #{primarykeyfilter.join(' AND ')}"
104
+ ETL::Engine.logger.debug("Executing update: #{q}")
105
+ conn.update(q, "Update row #{current_row}")
106
+ end
107
+ @current_row += 1
108
+ end
109
+ buffer.clear
110
+ end
111
+ end
112
+
113
+ # Close the connection
114
+ def close
115
+ buffer << append_rows if append_rows
116
+ flush
117
+ end
118
+
119
+ private
120
+ def conn
121
+ @conn ||= begin
122
+ conn = ETL::Engine.connection(target)
123
+ conn.truncate(table_name) if truncate
124
+ conn
125
+ rescue
126
+ raise RuntimeError, "Problem to connect to db"
127
+ end
128
+ end
129
+
130
+ def table_name
131
+ ETL::Engine.table(table, ETL::Engine.connection(target))
132
+ end
133
+
134
+ end
135
+ end
136
+ end