activewarehouse-etl-sgonyea 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +236 -0
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +13 -0
- data/LICENSE +7 -0
- data/README.textile +111 -0
- data/Rakefile +103 -0
- data/TODO +28 -0
- data/active_support_logger.patch +78 -0
- data/activewarehouse-etl.gemspec +36 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +97 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +65 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +438 -0
- data/lib/etl/control/destination/csv_destination.rb +113 -0
- data/lib/etl/control/destination/database_destination.rb +97 -0
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +126 -0
- data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/destination/yaml_destination.rb +74 -0
- data/lib/etl/control/source.rb +132 -0
- data/lib/etl/control/source/database_source.rb +224 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +582 -0
- data/lib/etl/execution.rb +19 -0
- data/lib/etl/execution/base.rb +8 -0
- data/lib/etl/execution/batch.rb +10 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +90 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/csv_parser.rb +93 -0
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +94 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +39 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/database_join_processor.rb +82 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +27 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +40 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/md5_transform.rb +13 -0
- data/lib/etl/transform/ordinalize_transform.rb +14 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +3 -0
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/all.ebf +6 -0
- data/test/apache_combined_log.ctl +11 -0
- data/test/batch_test.rb +41 -0
- data/test/batch_with_error.ebf +6 -0
- data/test/batched1.ctl +0 -0
- data/test/batched2.ctl +0 -0
- data/test/block_processor.ctl +6 -0
- data/test/block_processor_error.ctl +1 -0
- data/test/block_processor_pre_post_process.ctl +4 -0
- data/test/block_processor_remove_rows.ctl +5 -0
- data/test/block_processor_test.rb +38 -0
- data/test/check_exist_processor_test.rb +92 -0
- data/test/check_unique_processor_test.rb +40 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +53 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +61 -0
- data/test/config/common.rb +29 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +37 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +40 -0
- data/test/control_test.rb +43 -0
- data/test/data/apache_combined_log.txt +3 -0
- data/test/data/bulk_import.txt +3 -0
- data/test/data/bulk_import_with_empties.txt +3 -0
- data/test/data/decode.txt +3 -0
- data/test/data/delimited.txt +3 -0
- data/test/data/encode_source_latin1.txt +2 -0
- data/test/data/excel.xls +0 -0
- data/test/data/excel2.xls +0 -0
- data/test/data/fixed_width.txt +3 -0
- data/test/data/multiple_delimited_1.txt +3 -0
- data/test/data/multiple_delimited_2.txt +3 -0
- data/test/data/nokogiri.xml +38 -0
- data/test/data/people.txt +3 -0
- data/test/data/sax.xml +14 -0
- data/test/data/xml.xml +16 -0
- data/test/database_join_processor_test.rb +43 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/delimited.ctl +30 -0
- data/test/delimited_absolute.ctl +31 -0
- data/test/delimited_destination_db.ctl +23 -0
- data/test/delimited_excel.ctl +31 -0
- data/test/delimited_insert_update.ctl +34 -0
- data/test/delimited_update.ctl +34 -0
- data/test/delimited_with_bulk_load.ctl +34 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +78 -0
- data/test/ensure_fields_presence_processor_test.rb +28 -0
- data/test/errors.ctl +24 -0
- data/test/etl_test.rb +42 -0
- data/test/excel.ctl +24 -0
- data/test/excel2.ctl +25 -0
- data/test/fixed_width.ctl +35 -0
- data/test/foreign_key_lookup_transform_test.rb +50 -0
- data/test/generator_test.rb +14 -0
- data/test/inline_parser.ctl +17 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/model_source.ctl +14 -0
- data/test/multiple_delimited.ctl +22 -0
- data/test/multiple_source_delimited.ctl +39 -0
- data/test/nokogiri_all.ctl +35 -0
- data/test/nokogiri_select.ctl +35 -0
- data/test/nokogiri_test.rb +35 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/sax.ctl +26 -0
- data/test/scd/1.txt +1 -0
- data/test/scd/2.txt +1 -0
- data/test/scd/3.txt +1 -0
- data/test/scd_test.rb +257 -0
- data/test/scd_test_type_1.ctl +43 -0
- data/test/scd_test_type_2.ctl +34 -0
- data/test/screen_test.rb +9 -0
- data/test/screen_test_error.ctl +3 -0
- data/test/screen_test_fatal.ctl +3 -0
- data/test/source_test.rb +154 -0
- data/test/test_helper.rb +37 -0
- data/test/transform_test.rb +101 -0
- data/test/truncate_processor_test.rb +37 -0
- data/test/xml.ctl +31 -0
- metadata +370 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Destination which writes directly to a database. This is useful when you are dealing with
|
4
|
+
# a small amount of data. For larger amounts of data you should probably use the bulk
|
5
|
+
# loader if it is supported with your target database as it will use a much faster load
|
6
|
+
# method.
|
7
|
+
class DatabaseDestination < Destination
|
8
|
+
# The target connection
|
9
|
+
attr_reader :target
|
10
|
+
|
11
|
+
# The table
|
12
|
+
attr_reader :table
|
13
|
+
|
14
|
+
# Specify the order from the source
|
15
|
+
attr_reader :order
|
16
|
+
|
17
|
+
# Set to true to truncate the destination table first
|
18
|
+
attr_reader :truncate
|
19
|
+
|
20
|
+
# Initialize the database destination
|
21
|
+
#
|
22
|
+
# * <tt>control</tt>: The ETL::Control::Control instance
|
23
|
+
# * <tt>configuration</tt>: The configuration Hash
|
24
|
+
# * <tt>mapping</tt>: The mapping
|
25
|
+
#
|
26
|
+
# Configuration options:
|
27
|
+
# * <tt>:database</tt>: The database name (REQUIRED)
|
28
|
+
# * <tt>:target</tt>: The target connection (REQUIRED)
|
29
|
+
# * <tt>:table</tt>: The table to write to (REQUIRED)
|
30
|
+
# * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
|
31
|
+
# * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
|
32
|
+
# * <tt>:append_rows</tt>: Array of rows to append
|
33
|
+
#
|
34
|
+
# Mapping options:
|
35
|
+
# * <tt>:order</tt>: The order of fields to write (REQUIRED)
|
36
|
+
def initialize(control, configuration, mapping={})
|
37
|
+
super
|
38
|
+
@target = configuration[:target]
|
39
|
+
@table = configuration[:table]
|
40
|
+
@truncate = configuration[:truncate] ||= false
|
41
|
+
@unique = configuration[:unique] ? configuration[:unique] + [scd_effective_date_field] : configuration[:unique]
|
42
|
+
@unique.uniq! unless @unique.nil?
|
43
|
+
@order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
|
44
|
+
@order.uniq! unless @order.nil?
|
45
|
+
raise ControlError, "Order required in mapping" unless @order
|
46
|
+
raise ControlError, "Table required" unless @table
|
47
|
+
raise ControlError, "Target required" unless @target
|
48
|
+
end
|
49
|
+
|
50
|
+
# Flush the currently buffered data
|
51
|
+
def flush
|
52
|
+
conn.transaction do
|
53
|
+
buffer.flatten.each do |row|
|
54
|
+
# check to see if this row's compound key constraint already exists
|
55
|
+
# note that the compound key constraint may not utilize virtual fields
|
56
|
+
next unless row_allowed?(row)
|
57
|
+
|
58
|
+
# add any virtual fields
|
59
|
+
add_virtuals!(row)
|
60
|
+
|
61
|
+
names = []
|
62
|
+
values = []
|
63
|
+
order.each do |name|
|
64
|
+
names << conn.quote_column_name(name)
|
65
|
+
values << conn.quote(row[name])
|
66
|
+
end
|
67
|
+
q = "INSERT INTO #{conn.quote_table_name(table_name)} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
68
|
+
ETL::Engine.logger.debug("Executing insert: #{q}")
|
69
|
+
conn.insert(q, "Insert row #{current_row}")
|
70
|
+
@current_row += 1
|
71
|
+
end
|
72
|
+
buffer.clear
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Close the connection
|
77
|
+
def close
|
78
|
+
buffer << append_rows if append_rows
|
79
|
+
flush
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
def conn
|
84
|
+
@conn ||= begin
|
85
|
+
conn = ETL::Engine.connection(target)
|
86
|
+
conn.truncate(table_name) if truncate
|
87
|
+
conn
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def table_name
|
92
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
optional_require 'spreadsheet'
|
2
|
+
|
3
|
+
module ETL
|
4
|
+
module Control
|
5
|
+
# Excel as the final destination.
|
6
|
+
class ExcelDestination < Destination
|
7
|
+
# The File to write to
|
8
|
+
attr_reader :file
|
9
|
+
|
10
|
+
# The output order
|
11
|
+
attr_reader :order
|
12
|
+
|
13
|
+
# Flag which indicates to append (default is to overwrite)
|
14
|
+
attr_accessor :append
|
15
|
+
|
16
|
+
# Initialize the object.
|
17
|
+
# * <tt>control</tt>: The Control object
|
18
|
+
# * <tt>configuration</tt>: The configuration map
|
19
|
+
# * <tt>mapping</tt>: The output mapping
|
20
|
+
#
|
21
|
+
# Configuration options:
|
22
|
+
# * <tt>:file<tt>: The file to write to (REQUIRED)
|
23
|
+
# * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
|
24
|
+
# * <tt>:unique</tt>: Set to true to only write unique records
|
25
|
+
# * <tt>:append_rows</tt>: Array of rows to append
|
26
|
+
#
|
27
|
+
# Mapping options:
|
28
|
+
# * <tt>:order</tt>: The order array
|
29
|
+
def initialize(control, configuration, mapping={})
|
30
|
+
super
|
31
|
+
path = Pathname.new(configuration[:file])
|
32
|
+
@file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
|
33
|
+
@append = configuration[:append] ||= false
|
34
|
+
@unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
|
35
|
+
@unique.uniq! unless @unique.nil?
|
36
|
+
@order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
|
37
|
+
@order.uniq! unless @order.nil?
|
38
|
+
raise ControlError, "Order required in mapping" unless @order
|
39
|
+
end
|
40
|
+
|
41
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
42
|
+
def close
|
43
|
+
buffer << append_rows if append_rows
|
44
|
+
flush
|
45
|
+
book.write(file)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Flush the destination buffer
|
49
|
+
def flush
|
50
|
+
#puts "Flushing buffer (#{file}) with #{buffer.length} rows"
|
51
|
+
buffer.flatten.each_with_index do |row, index|
|
52
|
+
#puts "row change type: #{row.change_type}"
|
53
|
+
# check to see if this row's compound key constraint already exists
|
54
|
+
# note that the compound key constraint may not utilize virtual fields
|
55
|
+
next unless row_allowed?(row)
|
56
|
+
|
57
|
+
# add any virtual fields
|
58
|
+
add_virtuals!(row)
|
59
|
+
|
60
|
+
# collect all of the values using the order designated in the configuration
|
61
|
+
values = order.collect do |name|
|
62
|
+
value = row[name]
|
63
|
+
case value
|
64
|
+
when Date, Time, DateTime
|
65
|
+
value.to_s(:db)
|
66
|
+
else
|
67
|
+
value.to_s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# write the values
|
72
|
+
sheet.insert_row(index, values)
|
73
|
+
end
|
74
|
+
buffer.clear
|
75
|
+
#puts "After flush there are #{buffer.length} rows"
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
# Get the open file excel
|
80
|
+
def book
|
81
|
+
@book ||= ( append ? Spreadsheet.open(file) : Spreadsheet::Workbook.new(file) )
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
# Get the open sheet
|
86
|
+
def sheet
|
87
|
+
@sheet ||= ( append ? book.worksheet(0) : book.create_worksheet() )
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# This source file contains the ETL::Control::FileDestination
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Control #:nodoc:
|
5
|
+
# File as the final destination.
|
6
|
+
class FileDestination < Destination
|
7
|
+
# The File to write to
|
8
|
+
attr_reader :file
|
9
|
+
|
10
|
+
# The output order
|
11
|
+
attr_reader :order
|
12
|
+
|
13
|
+
# Flag which indicates to append (default is to overwrite)
|
14
|
+
attr_accessor :append
|
15
|
+
|
16
|
+
# The separator
|
17
|
+
attr_accessor :separator
|
18
|
+
|
19
|
+
# The end of line marker
|
20
|
+
attr_accessor :eol
|
21
|
+
|
22
|
+
# The enclosure character
|
23
|
+
attr_accessor :enclose
|
24
|
+
|
25
|
+
# Initialize the object.
|
26
|
+
# * <tt>control</tt>: The Control object
|
27
|
+
# * <tt>configuration</tt>: The configuration map
|
28
|
+
# * <tt>mapping</tt>: The output mapping
|
29
|
+
#
|
30
|
+
# Configuration options:
|
31
|
+
# * <tt>:file<tt>: The file to write to (REQUIRED)
|
32
|
+
# * <tt>:append</tt>: Set to true to append to the file (default is to overwrite)
|
33
|
+
# * <tt>:separator</tt>: Record separator (default is a comma)
|
34
|
+
# * <tt>:eol</tt>: End of line marker (default is \n)
|
35
|
+
# * <tt>:enclose</tt>: Enclosure character (default is none)
|
36
|
+
# * <tt>:unique</tt>: Set to true to only write unique records
|
37
|
+
# * <tt>:append_rows</tt>: Array of rows to append
|
38
|
+
#
|
39
|
+
# Mapping options:
|
40
|
+
# * <tt>:order</tt>: The order array
|
41
|
+
def initialize(control, configuration, mapping={})
|
42
|
+
super
|
43
|
+
path = Pathname.new(configuration[:file])
|
44
|
+
@file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(control.file))) + path
|
45
|
+
@append = configuration[:append] ||= false
|
46
|
+
@separator = configuration[:separator] ||= ','
|
47
|
+
@eol = configuration[:eol] ||= "\n"
|
48
|
+
@enclose = configuration[:enclose]
|
49
|
+
@unique = configuration[:unique] ? configuration[:unique] + scd_required_fields : configuration[:unique]
|
50
|
+
@unique.uniq! unless @unique.nil?
|
51
|
+
@order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
|
52
|
+
@order.uniq! unless @order.nil?
|
53
|
+
raise ControlError, "Order required in mapping" unless @order
|
54
|
+
end
|
55
|
+
|
56
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
57
|
+
def close
|
58
|
+
buffer << append_rows if append_rows
|
59
|
+
flush
|
60
|
+
f.close
|
61
|
+
end
|
62
|
+
|
63
|
+
# Flush the destination buffer
|
64
|
+
def flush
|
65
|
+
#puts "Flushing buffer (#{file}) with #{buffer.length} rows"
|
66
|
+
buffer.flatten.each do |row|
|
67
|
+
#puts "row change type: #{row.change_type}"
|
68
|
+
# check to see if this row's compound key constraint already exists
|
69
|
+
# note that the compound key constraint may not utilize virtual fields
|
70
|
+
next unless row_allowed?(row)
|
71
|
+
|
72
|
+
# add any virtual fields
|
73
|
+
add_virtuals!(row)
|
74
|
+
|
75
|
+
# collect all of the values using the order designated in the configuration
|
76
|
+
values = order.collect do |name|
|
77
|
+
value = row[name]
|
78
|
+
case value
|
79
|
+
when Date, Time, DateTime
|
80
|
+
value.to_s(:db)
|
81
|
+
else
|
82
|
+
value.to_s
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
values.collect! { |v| v.gsub(/\\/, '\\\\\\\\')}
|
87
|
+
values.collect! { |v| v.gsub(separator, "\\#{separator}")}
|
88
|
+
values.collect! { |v| v.gsub(/\n|\r/, '')}
|
89
|
+
|
90
|
+
# enclose the value if required
|
91
|
+
if !enclose.nil?
|
92
|
+
values.collect! { |v| enclose + v.gsub(/(#{enclose})/, '\\\\\1') + enclose }
|
93
|
+
end
|
94
|
+
|
95
|
+
# write the values joined by the separator defined in the configuration
|
96
|
+
f.write(values.join(separator))
|
97
|
+
|
98
|
+
# write the end-of-line
|
99
|
+
f.write(eol)
|
100
|
+
end
|
101
|
+
f.flush
|
102
|
+
buffer.clear
|
103
|
+
#puts "After flush there are #{buffer.length} rows"
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
# Get the open file stream
|
108
|
+
def f
|
109
|
+
@f ||= open(file, mode)
|
110
|
+
end
|
111
|
+
|
112
|
+
def options
|
113
|
+
@options ||= {
|
114
|
+
:col_sep => separator,
|
115
|
+
:row_sep => eol,
|
116
|
+
:force_quotes => !enclose.nil?
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
# Get the appropriate mode to open the file stream
|
121
|
+
def mode
|
122
|
+
append ? 'a' : 'w'
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Destination which writes directly to a database. This is useful when you are dealing with
|
4
|
+
# a small amount of data. For larger amounts of data you should probably use the bulk
|
5
|
+
# loader if it is supported with your target database as it will use a much faster load
|
6
|
+
# method.
|
7
|
+
class InsertUpdateDatabaseDestination < Destination
|
8
|
+
# The target connection
|
9
|
+
attr_reader :target
|
10
|
+
|
11
|
+
# The table
|
12
|
+
attr_reader :table
|
13
|
+
|
14
|
+
# Specify the order from the source
|
15
|
+
attr_reader :order
|
16
|
+
|
17
|
+
# Specify the primarykey from the source
|
18
|
+
attr_reader :primarykey
|
19
|
+
|
20
|
+
# Set to true to truncate the destination table first
|
21
|
+
attr_reader :truncate
|
22
|
+
|
23
|
+
# Initialize the database destination
|
24
|
+
#
|
25
|
+
# * <tt>control</tt>: The ETL::Control::Control instance
|
26
|
+
# * <tt>configuration</tt>: The configuration Hash
|
27
|
+
# * <tt>mapping</tt>: The mapping
|
28
|
+
#
|
29
|
+
# Configuration options:
|
30
|
+
# * <tt>:database</tt>: The database name (REQUIRED)
|
31
|
+
# * <tt>:target</tt>: The target connection (REQUIRED)
|
32
|
+
# * <tt>:table</tt>: The table to write to (REQUIRED)
|
33
|
+
# * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
|
34
|
+
# * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
|
35
|
+
# * <tt>:append_rows</tt>: Array of rows to append
|
36
|
+
#
|
37
|
+
# Mapping options:
|
38
|
+
# * <tt>:order</tt>: The order of fields to write (REQUIRED)
|
39
|
+
# * <tt>:primarykey</tt>: The primary key of fields to select insert or update (REQUIRED)
|
40
|
+
def initialize(control, configuration, mapping={})
|
41
|
+
super
|
42
|
+
@target = configuration[:target]
|
43
|
+
@table = configuration[:table]
|
44
|
+
@truncate = configuration[:truncate] ||= false
|
45
|
+
@unique = configuration[:unique] ? configuration[:unique] + [scd_effective_date_field] : configuration[:unique]
|
46
|
+
@unique.uniq! unless @unique.nil?
|
47
|
+
@order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
|
48
|
+
@order.uniq! unless @order.nil?
|
49
|
+
@primarykey = mapping[:primarykey] ? mapping[:primarykey] + scd_required_fields : nil
|
50
|
+
@primarykey.uniq! unless @primarykey.nil?
|
51
|
+
raise ControlError, "Primarykey required in mapping" unless @primarykey
|
52
|
+
raise ControlError, "Order required in mapping" unless @order
|
53
|
+
raise ControlError, "Table required" unless @table
|
54
|
+
raise ControlError, "Target required" unless @target
|
55
|
+
end
|
56
|
+
|
57
|
+
# Flush the currently buffered data
|
58
|
+
def flush
|
59
|
+
conn.transaction do
|
60
|
+
buffer.flatten.each do |row|
|
61
|
+
# check to see if this row's compound key constraint already exists
|
62
|
+
# note that the compound key constraint may not utilize virtual fields
|
63
|
+
next unless row_allowed?(row)
|
64
|
+
|
65
|
+
# add any virtual fields
|
66
|
+
add_virtuals!(row)
|
67
|
+
|
68
|
+
primarykeyfilter = []
|
69
|
+
primarykey.each do |name|
|
70
|
+
primarykeyfilter << "#{conn.quote_column_name(name)} = #{conn.quote(row[name])}"
|
71
|
+
end
|
72
|
+
q = "SELECT * FROM #{conn.quote_table_name(table_name)} WHERE #{primarykeyfilter.join(' AND ')}"
|
73
|
+
ETL::Engine.logger.debug("Executing select: #{q}")
|
74
|
+
res = conn.execute(q, "Select row #{current_row}")
|
75
|
+
none = true
|
76
|
+
|
77
|
+
case conn
|
78
|
+
when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
79
|
+
res.each { none = false }
|
80
|
+
when ActiveRecord::ConnectionAdapters::MysqlAdapter
|
81
|
+
res.each_hash { none = false }
|
82
|
+
res.free
|
83
|
+
when ActiveRecord::ConnectionAdapters::Mysql2Adapter
|
84
|
+
res.each { none = false }
|
85
|
+
else raise "Unsupported adapter #{conn.class} for this destination"
|
86
|
+
end
|
87
|
+
|
88
|
+
if none
|
89
|
+
names = []
|
90
|
+
values = []
|
91
|
+
order.each do |name|
|
92
|
+
names << conn.quote_column_name(name)
|
93
|
+
values << conn.quote(row[name])
|
94
|
+
end
|
95
|
+
q = "INSERT INTO #{conn.quote_table_name(table_name)} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
96
|
+
ETL::Engine.logger.debug("Executing insert: #{q}")
|
97
|
+
conn.insert(q, "Insert row #{current_row}")
|
98
|
+
else
|
99
|
+
updatevalues = []
|
100
|
+
order.each do |name|
|
101
|
+
updatevalues << "#{conn.quote_column_name(name)} = #{conn.quote(row[name])}"
|
102
|
+
end
|
103
|
+
q = "UPDATE #{conn.quote_table_name(table_name)} SET #{updatevalues.join(',')} WHERE #{primarykeyfilter.join(' AND ')}"
|
104
|
+
ETL::Engine.logger.debug("Executing update: #{q}")
|
105
|
+
conn.update(q, "Update row #{current_row}")
|
106
|
+
end
|
107
|
+
@current_row += 1
|
108
|
+
end
|
109
|
+
buffer.clear
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Close the connection
|
114
|
+
def close
|
115
|
+
buffer << append_rows if append_rows
|
116
|
+
flush
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
def conn
|
121
|
+
@conn ||= begin
|
122
|
+
conn = ETL::Engine.connection(target)
|
123
|
+
conn.truncate(table_name) if truncate
|
124
|
+
conn
|
125
|
+
rescue
|
126
|
+
raise RuntimeError, "Problem to connect to db"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def table_name
|
131
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|