factorylabs-activewarehouse-etl 0.9.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +198 -0
- data/LICENSE +7 -0
- data/README +85 -0
- data/Rakefile +153 -0
- data/TODO +28 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +78 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +420 -0
- data/lib/etl/control/destination/database_destination.rb +95 -0
- data/lib/etl/control/destination/file_destination.rb +124 -0
- data/lib/etl/control/source.rb +109 -0
- data/lib/etl/control/source/database_source.rb +220 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +556 -0
- data/lib/etl/execution.rb +20 -0
- data/lib/etl/execution/base.rb +9 -0
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +85 -0
- data/lib/etl/execution/record.rb +18 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/delimited_parser.rb +74 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +81 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +35 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +17 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +35 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +151 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/ordinalize_transform.rb +12 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +9 -0
- metadata +195 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Parser for fixed with files
|
4
|
+
class FixedWidthParser < ETL::Parser::Parser
|
5
|
+
# Initialize the parser
|
6
|
+
# * <tt>source</tt>: The source object
|
7
|
+
# * <tt>options</tt>: Parser options Hash
|
8
|
+
def initialize(source, options={})
|
9
|
+
super
|
10
|
+
configure
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return each row
|
14
|
+
def each
|
15
|
+
Dir.glob(file).each do |file|
|
16
|
+
open(file).each do |line|
|
17
|
+
row = {}
|
18
|
+
lines_skipped = 0
|
19
|
+
fields.each do |name, f|
|
20
|
+
if lines_skipped < source.skip_lines
|
21
|
+
lines_skipped += 1
|
22
|
+
next
|
23
|
+
end
|
24
|
+
# TODO make strip optional?
|
25
|
+
row[name] = line[f.field_start, f.field_length].strip
|
26
|
+
end
|
27
|
+
yield row
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return a map of defined fields
|
33
|
+
def fields
|
34
|
+
@fields ||= {}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def configure
|
39
|
+
source.definition.each do |field, options|
|
40
|
+
fields[field] = FixedWidthField.new(
|
41
|
+
options[:name], options[:start], options[:end], options[:length]
|
42
|
+
)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class FixedWidthField #:nodoc:
|
48
|
+
attr_reader :name, :field_start, :field_end, :field_length
|
49
|
+
# Initialize the field.
|
50
|
+
def initialize(name, field_start, field_end=nil, field_length=nil)
|
51
|
+
@name = name
|
52
|
+
@field_start = field_start - 1
|
53
|
+
if field_end
|
54
|
+
@field_end = field_end
|
55
|
+
@field_length = @field_end - @field_start
|
56
|
+
elsif field_length
|
57
|
+
@field_length = field_length
|
58
|
+
@field_end = @field_start + @field_length
|
59
|
+
else
|
60
|
+
raise DefinitionError, "Either field_end or field_length required"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Base parser class. Implementation classes must extend this class and implement
|
4
|
+
# the each method. The each method should return each row of the source data as
|
5
|
+
# a Hash.
|
6
|
+
class Parser
|
7
|
+
include Enumerable
|
8
|
+
class << self
|
9
|
+
# Convert the name (string or symbol) to a parser class.
|
10
|
+
#
|
11
|
+
# Example:
|
12
|
+
# <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
|
13
|
+
def class_for_name(name)
|
14
|
+
ETL::Parser.const_get("#{name.to_s.camelize}Parser")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# The Source object for the data
|
19
|
+
attr_reader :source
|
20
|
+
|
21
|
+
# Options Hash for the parser
|
22
|
+
attr_reader :options
|
23
|
+
|
24
|
+
def initialize(source, options={})
|
25
|
+
@source = source
|
26
|
+
@options = options || {}
|
27
|
+
end
|
28
|
+
|
29
|
+
protected
|
30
|
+
def file
|
31
|
+
path = Pathname.new(source.configuration[:file])
|
32
|
+
path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
|
33
|
+
path
|
34
|
+
end
|
35
|
+
|
36
|
+
def raise_with_info(error, message, file, line)
|
37
|
+
raise error, "#{message} (line #{line} in #{file})"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'rexml/parsers/sax2parser'
|
2
|
+
require 'rexml/sax2listener'
|
3
|
+
|
4
|
+
module ETL #:nodoc:
|
5
|
+
module Parser #:nodoc:
|
6
|
+
# ETL parser implementation which uses SAX to parse XML files.
|
7
|
+
class SaxParser < ETL::Parser::Parser
|
8
|
+
|
9
|
+
# The write trigger causes whatever values are currently specified for the row to be returned.
|
10
|
+
# After returning the values will not be cleared, thus allowing for values which are assigned
|
11
|
+
# higher in the XML tree to remain in memory.
|
12
|
+
attr_accessor :write_trigger
|
13
|
+
|
14
|
+
# Initialize the parser
|
15
|
+
# * <tt>source</tt>: The Source object
|
16
|
+
# * <tt>options</tt>: Parser options Hash
|
17
|
+
def initialize(source, options={})
|
18
|
+
super
|
19
|
+
configure
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns each row
|
23
|
+
def each(&block)
|
24
|
+
Dir.glob(file).each do |file|
|
25
|
+
parser = REXML::Parsers::SAX2Parser.new(File.new(file))
|
26
|
+
listener = Listener.new(self, &block)
|
27
|
+
parser.listen(listener)
|
28
|
+
parser.parse
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get an array of Field objects
|
33
|
+
def fields
|
34
|
+
@fields ||= []
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def configure
|
39
|
+
#puts "write trigger in source.definition: #{source.definition[:write_trigger]}"
|
40
|
+
self.write_trigger = source.definition[:write_trigger]
|
41
|
+
# map paths to field names
|
42
|
+
source.definition[:fields].each do |name, path|
|
43
|
+
#puts "defined field #{name}, path: #{path}"
|
44
|
+
fields << Field.new(name, XPath::Path.parse(path))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Class representing a field to be loaded from the source
|
49
|
+
class Field
|
50
|
+
# The name of the field
|
51
|
+
attr_reader :name
|
52
|
+
# The XPath-like path to the field in the XML document
|
53
|
+
attr_reader :path
|
54
|
+
|
55
|
+
def initialize(name, path) #:nodoc
|
56
|
+
@name = name
|
57
|
+
@path = path
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class Listener #:nodoc:
|
63
|
+
include REXML::SAX2Listener
|
64
|
+
def initialize(parser, &block)
|
65
|
+
@parser = parser
|
66
|
+
@row = {}
|
67
|
+
@value = nil
|
68
|
+
@proc = Proc.new(&block)
|
69
|
+
end
|
70
|
+
def cdata(text)
|
71
|
+
@value << text
|
72
|
+
end
|
73
|
+
def characters(text)
|
74
|
+
text = text.strip
|
75
|
+
if (!text.nil? && text != '')
|
76
|
+
@value ||= ''
|
77
|
+
@value << text
|
78
|
+
end
|
79
|
+
end
|
80
|
+
def start_document
|
81
|
+
@path = XPath::Path.new
|
82
|
+
end
|
83
|
+
def end_document
|
84
|
+
|
85
|
+
end
|
86
|
+
def start_element(uri, localname, qname, attributes)
|
87
|
+
element = XPath::Element.new(localname, attributes)
|
88
|
+
@path.elements << element
|
89
|
+
|
90
|
+
@parser.fields.each do |field|
|
91
|
+
#puts "#{@path} match? #{field.path}"
|
92
|
+
if @path.match?(field.path)
|
93
|
+
#puts "field.path: #{field.path}"
|
94
|
+
if field.path.is_attribute?
|
95
|
+
#puts "setting @row[#{field.name}] to #{element.attributes[field.path.attribute]}"
|
96
|
+
@row[field.name] = element.attributes[field.path.attribute]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
def end_element(uri, localname, qname)
|
102
|
+
element = @path.elements.last
|
103
|
+
|
104
|
+
@parser.fields.each do |field|
|
105
|
+
#puts "#{@path} match? #{field.path}"
|
106
|
+
if @path.match?(field.path)
|
107
|
+
#puts "field.path: #{field.path}"
|
108
|
+
if !field.path.is_attribute?
|
109
|
+
@row[field.name] = @value
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#puts @path.to_s
|
115
|
+
if @path.match?(@parser.write_trigger)
|
116
|
+
#puts "matched: #{@path} =~ #{@parser.write_trigger}"
|
117
|
+
#puts "calling proc with #{@row.inspect}"
|
118
|
+
@proc.call(@row.clone)
|
119
|
+
end
|
120
|
+
|
121
|
+
@value = nil
|
122
|
+
@path.elements.pop
|
123
|
+
end
|
124
|
+
def progress(position)
|
125
|
+
@position = position
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Module which contains classes that are used for XPath-like filtering
|
130
|
+
# on the SAX parser
|
131
|
+
module XPath #:nodoc:
|
132
|
+
class Path #:nodoc:
|
133
|
+
# Get the elements in the path
|
134
|
+
attr_accessor :elements
|
135
|
+
|
136
|
+
# Initialize
|
137
|
+
def initialize
|
138
|
+
@elements = []
|
139
|
+
end
|
140
|
+
|
141
|
+
# Convert to a string representation
|
142
|
+
def to_s
|
143
|
+
@elements.map{ |e| e.to_s }.join("/")
|
144
|
+
end
|
145
|
+
|
146
|
+
# Returns true if the last part of the path refers to an attribute
|
147
|
+
def is_attribute?
|
148
|
+
elements.last.attributes.length > 0
|
149
|
+
end
|
150
|
+
|
151
|
+
# Return the name of the attribute referenced by the last element in this path. Returns nil if the last element
|
152
|
+
# does not reference an attribute.
|
153
|
+
#
|
154
|
+
# Warning: the path must only reference a single attribute, otherwise the result of this method will be random,
|
155
|
+
# since attributes are stored in a Hash.
|
156
|
+
def attribute
|
157
|
+
return nil unless is_attribute?
|
158
|
+
elements.last.attributes.keys.first
|
159
|
+
end
|
160
|
+
|
161
|
+
# Return true if this XPath::Path matches the given path string. This is a fail-fast match, so the first mismatch
|
162
|
+
# will cause the method to return false.
|
163
|
+
def match?(s)
|
164
|
+
path = Path.parse(s)
|
165
|
+
return false unless path.elements.length == elements.length
|
166
|
+
elements.each_with_index do |element, index|
|
167
|
+
path_element = path.elements[index]
|
168
|
+
return false if path_element.nil?
|
169
|
+
return false if element.name != path_element.name
|
170
|
+
path_element.attributes.each do |key, value|
|
171
|
+
return false unless element.attributes[key] =~ value
|
172
|
+
end
|
173
|
+
end
|
174
|
+
return true
|
175
|
+
end
|
176
|
+
|
177
|
+
# Parse the string into an XPath::Path object
|
178
|
+
def self.parse(s)
|
179
|
+
return s if s.is_a?(Path)
|
180
|
+
path = Path.new
|
181
|
+
parts = s.split('/')
|
182
|
+
parts.each_with_index do |part, i|
|
183
|
+
attributes = {}
|
184
|
+
part.gsub!(/(.*)\[(.*)\]/, '\1')
|
185
|
+
if !$2.nil?
|
186
|
+
$2.split(",").each do |pair|
|
187
|
+
key, value = pair.split("=")
|
188
|
+
value = ".*" if value.nil?
|
189
|
+
attributes[key] = Regexp.new(value)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
path.elements << Element.new(part, attributes)
|
193
|
+
end
|
194
|
+
path
|
195
|
+
end
|
196
|
+
end
|
197
|
+
class Element #:nodoc
|
198
|
+
attr_reader :name
|
199
|
+
attr_reader :attributes
|
200
|
+
def initialize(name, attributes={})
|
201
|
+
@name = name
|
202
|
+
@attributes = attributes
|
203
|
+
end
|
204
|
+
def to_s
|
205
|
+
s = "#{name}"
|
206
|
+
if !@attributes.empty?
|
207
|
+
attr_str = @attributes.collect do |key,value|
|
208
|
+
value = value.source if value.is_a?(Regexp)
|
209
|
+
"#{key}=#{value}"
|
210
|
+
end.join(",")
|
211
|
+
s << "[" + attr_str + "]"
|
212
|
+
end
|
213
|
+
s
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
module ETL
|
4
|
+
module Parser
|
5
|
+
class XmlParser < ETL::Parser::Parser
|
6
|
+
# Initialize the parser
|
7
|
+
# * <tt>source</tt>: The Source object
|
8
|
+
# * <tt>options</tt>: Parser options Hash
|
9
|
+
def initialize(source, options={})
|
10
|
+
super
|
11
|
+
configure
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns each row
|
15
|
+
def each
|
16
|
+
Dir.glob(file).each do |file|
|
17
|
+
doc = nil
|
18
|
+
t = Benchmark.realtime do
|
19
|
+
doc = REXML::Document.new(File.new(file))
|
20
|
+
end
|
21
|
+
Engine.logger.info "XML #{file} parsed in #{t}s"
|
22
|
+
doc.elements.each(@collection_xpath) do |element|
|
23
|
+
row = {}
|
24
|
+
fields.each do |f|
|
25
|
+
value = element.text(f.xpath)
|
26
|
+
row[f.name] = value
|
27
|
+
end
|
28
|
+
yield row
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get an array of defined fields
|
34
|
+
def fields
|
35
|
+
@fields ||= []
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
def configure
|
40
|
+
@collection_xpath = source.definition[:collection]
|
41
|
+
raise "Collection XPath is required" if @collection_xpath.nil?
|
42
|
+
|
43
|
+
source.definition[:fields].each do |options|
|
44
|
+
case options
|
45
|
+
when Symbol
|
46
|
+
fields << Field.new(options, options.to_s)
|
47
|
+
when Hash
|
48
|
+
options[:xpath] ||= options[:name]
|
49
|
+
fields << Field.new(options[:name], options[:xpath].to_s)
|
50
|
+
else
|
51
|
+
raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Field
|
57
|
+
attr_reader :name, :xpath
|
58
|
+
def initialize(name, xpath)
|
59
|
+
@name = name
|
60
|
+
@xpath = xpath
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# This source file contains the ETL::Processor module and requires all of the processors
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
# The ETL::Processor module contains row-level and bulk processors
|
5
|
+
module Processor
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'etl/processor/processor'
|
10
|
+
require 'etl/processor/row_processor'
|
11
|
+
Dir[File.dirname(__FILE__) + "/processor/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ETL
|
2
|
+
module Processor
|
3
|
+
# This processor is both a valid RowProcessor (called on each row with after_read) or a Processor (called once on pre_process or post_process)
|
4
|
+
class BlockProcessor < ETL::Processor::RowProcessor
|
5
|
+
def initialize(control, configuration)
|
6
|
+
super
|
7
|
+
@block = configuration[:block]
|
8
|
+
end
|
9
|
+
def process(row=nil)
|
10
|
+
@block.call(row)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Processor #:nodoc:
|
3
|
+
# Processor which is used to bulk import data into a target database. The
|
4
|
+
# underlying database driver from ActiveRecord must support the methods
|
5
|
+
# +bulk_load+ method.
|
6
|
+
class BulkImportProcessor < ETL::Processor::Processor
|
7
|
+
|
8
|
+
# The file to load from
|
9
|
+
attr_reader :file
|
10
|
+
# The target database
|
11
|
+
attr_reader :target
|
12
|
+
# The table name
|
13
|
+
attr_reader :table
|
14
|
+
# Set to true to truncate
|
15
|
+
attr_reader :truncate
|
16
|
+
# Array of symbols representing the column load order
|
17
|
+
attr_reader :columns
|
18
|
+
# The field separator (defaults to a comma)
|
19
|
+
attr_accessor :field_separator
|
20
|
+
# The field enclosure (defaults to nil)
|
21
|
+
attr_accessor :field_enclosure
|
22
|
+
# The line separator (defaults to a newline)
|
23
|
+
attr_accessor :line_separator
|
24
|
+
# The string that indicates a NULL (defaults to an empty string)
|
25
|
+
attr_accessor :null_string
|
26
|
+
|
27
|
+
# Initialize the processor.
|
28
|
+
#
|
29
|
+
# Configuration options:
|
30
|
+
# * <tt>:file</tt>: The file to load data from
|
31
|
+
# * <tt>:target</tt>: The target database
|
32
|
+
# * <tt>:table</tt>: The table name
|
33
|
+
# * <tt>:truncate</tt>: Set to true to truncate before loading
|
34
|
+
# * <tt>:columns</tt>: The columns to load in the order they appear in
|
35
|
+
# the bulk data file
|
36
|
+
# * <tt>:field_separator</tt>: The field separator. Defaults to a comma
|
37
|
+
# * <tt>:line_separator</tt>: The line separator. Defaults to a newline
|
38
|
+
# * <tt>:field_enclosure</tt>: The field enclosure charcaters
|
39
|
+
def initialize(control, configuration)
|
40
|
+
super
|
41
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
42
|
+
@target = configuration[:target]
|
43
|
+
@table = configuration[:table]
|
44
|
+
@truncate = configuration[:truncate] ||= false
|
45
|
+
@columns = configuration[:columns]
|
46
|
+
@field_separator = (configuration[:field_separator] || ',')
|
47
|
+
@line_separator = (configuration[:line_separator] || "\n")
|
48
|
+
@null_string = (configuration[:null_string] || "")
|
49
|
+
@field_enclosure = configuration[:field_enclosure]
|
50
|
+
|
51
|
+
raise ControlError, "Target must be specified" unless @target
|
52
|
+
raise ControlError, "Table must be specified" unless @table
|
53
|
+
end
|
54
|
+
|
55
|
+
# Execute the processor
|
56
|
+
def process
|
57
|
+
return if ETL::Engine.skip_bulk_import
|
58
|
+
return if File.size(file) == 0
|
59
|
+
|
60
|
+
conn = ETL::Engine.connection(target)
|
61
|
+
conn.transaction do
|
62
|
+
conn.truncate(table_name) if truncate
|
63
|
+
options = {}
|
64
|
+
options[:columns] = columns
|
65
|
+
if field_separator || field_enclosure || line_separator || null_string
|
66
|
+
options[:fields] = {}
|
67
|
+
options[:fields][:null_string] = null_string if null_string
|
68
|
+
options[:fields][:delimited_by] = field_separator if field_separator
|
69
|
+
options[:fields][:enclosed_by] = field_enclosure if field_enclosure
|
70
|
+
options[:fields][:terminated_by] = line_separator if line_separator
|
71
|
+
end
|
72
|
+
conn.bulk_load(file, table_name, options)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def table_name
|
77
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|