activewarehouse-etl 0.8.4 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +98 -62
- data/Rakefile +11 -0
- data/TODO +2 -1
- data/lib/etl.rb +9 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +1 -0
- data/lib/etl/builder/date_dimension_builder.rb +83 -0
- data/lib/etl/commands/etl.rb +56 -43
- data/lib/etl/control/control.rb +58 -9
- data/lib/etl/control/destination.rb +29 -4
- data/lib/etl/control/destination/database_destination.rb +17 -27
- data/lib/etl/control/source/database_source.rb +17 -40
- data/lib/etl/control/source/file_source.rb +8 -5
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +40 -0
- data/lib/etl/engine.rb +184 -83
- data/lib/etl/execution.rb +1 -0
- data/lib/etl/execution/base.rb +1 -1
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +1 -0
- data/lib/etl/execution/migration.rb +16 -4
- data/lib/etl/generator/surrogate_key_generator.rb +20 -4
- data/lib/etl/http_tools.rb +1 -1
- data/lib/etl/processor/bulk_import_processor.rb +16 -19
- data/lib/etl/processor/check_exist_processor.rb +16 -7
- data/lib/etl/processor/hierarchy_exploder_processor.rb +2 -1
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/surrogate_key_processor.rb +22 -2
- data/lib/etl/processor/truncate_processor.rb +13 -13
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +7 -2
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -5
- data/lib/etl/transform/hierarchy_lookup_transform.rb +7 -14
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +2 -2
- metadata +19 -2
data/lib/etl/builder.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'etl/builder/date_dimension_builder'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
# A builder which will build a data structure which can be used to populate a date dimension using
|
4
|
+
# commonly used date dimension columns.
|
5
|
+
class DateDimensionBuilder
|
6
|
+
# Specify the start date for the first record
|
7
|
+
attr_accessor :start_date
|
8
|
+
|
9
|
+
# Specify the end date for the last record
|
10
|
+
attr_accessor :end_date
|
11
|
+
|
12
|
+
# Define any holiday indicators
|
13
|
+
attr_accessor :holiday_indicators
|
14
|
+
|
15
|
+
# Define the weekday indicators. The default array begins on Sunday and goes to Saturday.
|
16
|
+
cattr_accessor :weekday_indicators
|
17
|
+
@@weekday_indicators = ['Weekend','Weekday','Weekday','Weekday','Weekday','Weekday','Weekend']
|
18
|
+
|
19
|
+
# Initialize the builder.
|
20
|
+
#
|
21
|
+
# * <tt>start_date</tt>: The start date. Defaults to 5 years ago from today.
|
22
|
+
# * <tt>end_date</tt>: The end date. Defaults to now.
|
23
|
+
def initialize(start_date=Time.now.years_ago(5), end_date=Time.now)
|
24
|
+
@start_date = start_date
|
25
|
+
@end_date = end_date
|
26
|
+
@holiday_indicators = []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns an array of hashes representing records in the dimension. The values for each record are
|
30
|
+
# accessed by name.
|
31
|
+
def build(options={})
|
32
|
+
records = []
|
33
|
+
date = start_date.to_time
|
34
|
+
while date <= end_date.to_time
|
35
|
+
record = {}
|
36
|
+
record[:date] = date.strftime("%m/%d/%Y")
|
37
|
+
record[:full_date_description] = date.strftime("%B %d,%Y")
|
38
|
+
record[:day_of_week] = date.strftime("%A")
|
39
|
+
#record[:day_number_in_epoch] = date.to_i / 24
|
40
|
+
#record[:week_number_in_epoch] = date.to_i / (24 * 7)
|
41
|
+
#record[:month_number_in_epoch] = date.to_i / (24 * 7 * 30)
|
42
|
+
record[:day_number_in_calendar_month] = date.day
|
43
|
+
record[:day_number_in_calendar_year] = date.yday
|
44
|
+
record[:day_number_in_fiscal_month] = date.day # should this be different from CY?
|
45
|
+
record[:day_number_in_fiscal_year] = date.fiscal_year_yday
|
46
|
+
#record[:last_day_in_week_indicator] =
|
47
|
+
#record[:last_day_in_month_indicator] =
|
48
|
+
#record[:calendar_week_ending_date] =
|
49
|
+
record[:calendar_week] = "Week #{date.week}"
|
50
|
+
record[:calendar_week_number_in_year] = date.week
|
51
|
+
record[:calendar_month_name] = date.strftime("%B")
|
52
|
+
record[:calendar_month_number_in_year] = date.month
|
53
|
+
record[:calendar_year_month] = date.strftime("%Y-%m")
|
54
|
+
record[:calendar_quarter] = "Q#{date.quarter}"
|
55
|
+
record[:calendar_quarter_number_in_year] = date.quarter
|
56
|
+
record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
|
57
|
+
#record[:calendar_half_year] =
|
58
|
+
record[:calendar_year] = "#{date.year}"
|
59
|
+
record[:fiscal_week] = "FY Week #{date.fiscal_year_week}"
|
60
|
+
record[:fiscal_week_number_in_year] = date.fiscal_year_week
|
61
|
+
record[:fiscal_month] = date.fiscal_year_month
|
62
|
+
record[:fiscal_month_number_in_year] = date.fiscal_year_month
|
63
|
+
record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
|
64
|
+
record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
|
65
|
+
record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
|
66
|
+
record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
|
67
|
+
#record[:fiscal_half_year] =
|
68
|
+
record[:fiscal_year] = "FY#{date.fiscal_year}"
|
69
|
+
record[:fiscal_year_number] = date.fiscal_year
|
70
|
+
record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
|
71
|
+
record[:weekday_indicator] = weekday_indicators[date.wday]
|
72
|
+
record[:selling_season] = 'None'
|
73
|
+
record[:major_event] = 'None'
|
74
|
+
record[:sql_date_stamp] = date
|
75
|
+
|
76
|
+
records << record
|
77
|
+
date = date.tomorrow
|
78
|
+
end
|
79
|
+
records
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/etl/commands/etl.rb
CHANGED
@@ -24,54 +24,67 @@
|
|
24
24
|
require 'benchmark'
|
25
25
|
require 'getoptlong'
|
26
26
|
|
27
|
-
opts = GetoptLong.new(
|
28
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
29
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT ],
|
30
|
-
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
|
31
|
-
[ '--offset', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
32
|
-
[ '--newlog', '-n', GetoptLong::NO_ARGUMENT ],
|
33
|
-
[ '--skip-bulk-import', '-s', GetoptLong::NO_ARGUMENT ],
|
34
|
-
[ '--read-locally', GetoptLong::NO_ARGUMENT]
|
35
|
-
)
|
36
|
-
|
37
27
|
# Print a usage statement
|
38
28
|
def usage #:nodoc:
|
39
|
-
puts "Usage: etl
|
29
|
+
puts "Usage: etl file [file file ...]" # TODO: add the command line options
|
40
30
|
end
|
41
31
|
|
42
|
-
|
43
|
-
opts
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
32
|
+
def execute
|
33
|
+
opts = GetoptLong.new(
|
34
|
+
[ '--version', '-v', GetoptLong::NO_ARGUMENT],
|
35
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
36
|
+
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT ],
|
37
|
+
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
|
38
|
+
[ '--offset', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
39
|
+
[ '--newlog', '-n', GetoptLong::NO_ARGUMENT ],
|
40
|
+
[ '--skip-bulk-import', '-s', GetoptLong::NO_ARGUMENT ],
|
41
|
+
[ '--read-locally', GetoptLong::NO_ARGUMENT],
|
42
|
+
[ '--rails-root', GetoptLong::REQUIRED_ARGUMENT]
|
43
|
+
)
|
44
|
+
|
45
|
+
options = {}
|
46
|
+
opts.each do |opt, arg|
|
47
|
+
case opt
|
48
|
+
when '--version'
|
49
|
+
puts "ActiveWarehouse ETL version #{ETL::VERSION::STRING}"
|
50
|
+
return
|
51
|
+
when '--help'
|
52
|
+
usage
|
53
|
+
return
|
54
|
+
when '--config'
|
55
|
+
options[:config] = arg
|
56
|
+
when '--limit'
|
57
|
+
options[:limit] = arg.to_i
|
58
|
+
when '--offset'
|
59
|
+
options[:offset] = arg.to_i
|
60
|
+
when '--newlog'
|
61
|
+
options[:newlog] = true
|
62
|
+
when '--skip-bulk-import'
|
63
|
+
puts "skip bulk import enabled"
|
64
|
+
options[:skip_bulk_import] = true
|
65
|
+
when '--read-locally'
|
66
|
+
puts "read locally enabled"
|
67
|
+
options[:read_locally] = true
|
68
|
+
when '--rails-root'
|
69
|
+
options[:rails_root] = arg
|
70
|
+
puts "rails root set to #{options[:rails_root]}"
|
71
|
+
end
|
61
72
|
end
|
62
|
-
end
|
63
73
|
|
64
|
-
if ARGV.length < 1
|
65
|
-
|
66
|
-
else
|
67
|
-
|
74
|
+
if ARGV.length < 1
|
75
|
+
usage
|
76
|
+
else
|
77
|
+
puts "Starting ETL process"
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
79
|
+
ETL::Engine.init(options)
|
80
|
+
ARGV.each do |f|
|
81
|
+
puts "Processing #{f}"
|
82
|
+
ETL::Engine.realtime_activity = true
|
83
|
+
ETL::Engine.process(f)
|
84
|
+
end
|
75
85
|
|
76
|
-
|
77
|
-
end
|
86
|
+
puts "ETL process complete"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
execute
|
data/lib/etl/control/control.rb
CHANGED
@@ -2,6 +2,8 @@ module ETL #:nodoc:
|
|
2
2
|
module Control #:nodoc:
|
3
3
|
# The Context is passed to eval.
|
4
4
|
class Context
|
5
|
+
require 'test/unit/assertions'
|
6
|
+
include Test::Unit::Assertions
|
5
7
|
attr_reader :control
|
6
8
|
|
7
9
|
class << self
|
@@ -26,10 +28,11 @@ module ETL #:nodoc:
|
|
26
28
|
control.error_threshold = error_threshold
|
27
29
|
end
|
28
30
|
|
29
|
-
# Define a list of control files that this file depends on. Those control
|
30
|
-
# will be executed prior to this control file. The list may
|
31
|
-
# be converted to file names by calling
|
32
|
-
# case they will be used
|
31
|
+
# Define a list of control files that this file depends on. Those control
|
32
|
+
# files will be executed prior to this control file. The list may
|
33
|
+
# contain symbols that will be converted to file names by calling
|
34
|
+
# to_s + '.ctl', or they may be strings in which case they will be used
|
35
|
+
# as is
|
33
36
|
def depends_on(*args)
|
34
37
|
dependencies << args
|
35
38
|
end
|
@@ -53,7 +56,7 @@ module ETL #:nodoc:
|
|
53
56
|
if configuration[:type].is_a?(ETL::Control::Source)
|
54
57
|
sources << configuration[:type]
|
55
58
|
else
|
56
|
-
raise "
|
59
|
+
raise ControlError, "Type must be a Class, String, Symbol or object extending ETL::Control::Source"
|
57
60
|
end
|
58
61
|
end
|
59
62
|
else
|
@@ -61,7 +64,9 @@ module ETL #:nodoc:
|
|
61
64
|
if configuration[source_type]
|
62
65
|
source_class = ETL::Control::Source.class_for_name(source_type)
|
63
66
|
sources << source_class.new(self, configuration, definition)
|
67
|
+
break
|
64
68
|
end
|
69
|
+
raise ControlError, "A source was specified but no matching type was found"
|
65
70
|
end
|
66
71
|
end
|
67
72
|
end
|
@@ -73,10 +78,29 @@ module ETL #:nodoc:
|
|
73
78
|
|
74
79
|
# Define a destination
|
75
80
|
def destination(name, configuration={}, mapping={})
|
76
|
-
|
77
|
-
|
78
|
-
|
81
|
+
if configuration[:type]
|
82
|
+
case configuration[:type]
|
83
|
+
when Class
|
84
|
+
dest_class = configuration[:type]
|
79
85
|
destinations << dest_class.new(self, configuration, mapping)
|
86
|
+
when String, Symbol
|
87
|
+
dest_class = ETL::Control::Destination.class_for_name(configuration[:type])
|
88
|
+
destinations << dest_class.new(self, configuration, mapping)
|
89
|
+
else
|
90
|
+
if configuration[:type].is_a?(ETL::Control::Destination)
|
91
|
+
destinations << configuration[:type]
|
92
|
+
else
|
93
|
+
raise ControlError, "Type must be a Class, String, Symbol or object extending ETL::Control::Destination"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
else
|
97
|
+
destination_types.each do |dest_type|
|
98
|
+
if configuration[dest_type]
|
99
|
+
dest_class = ETL::Control::Destination.class_for_name(dest_type)
|
100
|
+
destinations << dest_class.new(self, configuration, mapping)
|
101
|
+
break
|
102
|
+
end
|
103
|
+
raise ControlError, "A destination was specified but no matching destination type was found"
|
80
104
|
end
|
81
105
|
end
|
82
106
|
end
|
@@ -121,6 +145,17 @@ module ETL #:nodoc:
|
|
121
145
|
control.transforms
|
122
146
|
end
|
123
147
|
|
148
|
+
# Define a screen block. The type argument must be one of :fatal, :error
|
149
|
+
# or :warn
|
150
|
+
def screen(type, &block)
|
151
|
+
screens[type] << block
|
152
|
+
end
|
153
|
+
|
154
|
+
# Get the screen blocks
|
155
|
+
def screens
|
156
|
+
control.screens
|
157
|
+
end
|
158
|
+
|
124
159
|
# Rename the source field to the destination field
|
125
160
|
def rename(source, destination)
|
126
161
|
after_read :rename, :source => source, :dest => destination
|
@@ -222,7 +257,6 @@ module ETL #:nodoc:
|
|
222
257
|
# Parse a control file and return a Control instance
|
223
258
|
def parse(control_file)
|
224
259
|
control_file = control_file.path if control_file.instance_of?(File)
|
225
|
-
# logger.debug "Parsing control file #{control_file.path}"
|
226
260
|
control = ETL::Control::Control.new(control_file)
|
227
261
|
# TODO: better handling of parser errors. Return the line in the control file where the error occurs.
|
228
262
|
eval(IO.readlines(control_file).join("\n"), Context.create(control), control_file)
|
@@ -230,6 +264,13 @@ module ETL #:nodoc:
|
|
230
264
|
control
|
231
265
|
end
|
232
266
|
|
267
|
+
def parse_text(text)
|
268
|
+
control = ETL::Control::Control.new(nil)
|
269
|
+
eval(text, Context.create(control), 'inline')
|
270
|
+
control.validate
|
271
|
+
control
|
272
|
+
end
|
273
|
+
|
233
274
|
# Resolve the given object to an ETL::Control::Control instance. Acceptable arguments
|
234
275
|
# are:
|
235
276
|
# * The path to a control file as a String
|
@@ -300,6 +341,14 @@ module ETL #:nodoc:
|
|
300
341
|
@transforms ||= []
|
301
342
|
end
|
302
343
|
|
344
|
+
def screens
|
345
|
+
@screens ||= {
|
346
|
+
:fatal => [],
|
347
|
+
:error => [],
|
348
|
+
:warn => []
|
349
|
+
}
|
350
|
+
end
|
351
|
+
|
303
352
|
# Get the error threshold. Defaults to 100.
|
304
353
|
def error_threshold
|
305
354
|
@error_threshold ||= 100
|
@@ -157,7 +157,12 @@ module ETL #:nodoc:
|
|
157
157
|
|
158
158
|
# Get the dimension table if specified
|
159
159
|
def dimension_table
|
160
|
-
configuration[:scd][:dimension_table] if scd?
|
160
|
+
ETL::Engine.table(configuration[:scd][:dimension_table], dimension_target) if scd?
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get the dimension target if specified
|
164
|
+
def dimension_target
|
165
|
+
configuration[:scd][:dimension_target] if scd?
|
161
166
|
end
|
162
167
|
|
163
168
|
# Process a row to determine the change type
|
@@ -209,10 +214,16 @@ module ETL #:nodoc:
|
|
209
214
|
|
210
215
|
if scd_type == 2
|
211
216
|
ETL::Engine.logger.debug "type 2 SCD"
|
217
|
+
|
218
|
+
raise ConfigurationError, "dimension_table setting required" unless dimension_table
|
219
|
+
raise ConfigurationError, "dimension_target setting required" unless dimension_target
|
220
|
+
|
221
|
+
conn = ETL::Engine.connection(dimension_target)
|
222
|
+
|
212
223
|
q = "SELECT * FROM #{dimension_table} WHERE "
|
213
224
|
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
214
225
|
#puts "looking for original record"
|
215
|
-
result =
|
226
|
+
result = conn.select_one(q)
|
216
227
|
if result
|
217
228
|
#puts "Result: #{result.inspect}"
|
218
229
|
original_record = ETL::Row[result.symbolize_keys!]
|
@@ -223,6 +234,15 @@ module ETL #:nodoc:
|
|
223
234
|
# need to figure out how to delete that old record before inserting the
|
224
235
|
# updated version of the record
|
225
236
|
|
237
|
+
q = "DELETE FROM #{dimension_table} WHERE "
|
238
|
+
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
239
|
+
|
240
|
+
num_rows_affected = conn.delete(q)
|
241
|
+
ETL::Engine.logger.debug "deleted old row"
|
242
|
+
|
243
|
+
# do this?
|
244
|
+
#raise "Should have deleted a single record" if num_rows_affected != 1
|
245
|
+
|
226
246
|
buffer << original_record
|
227
247
|
end
|
228
248
|
|
@@ -239,9 +259,14 @@ module ETL #:nodoc:
|
|
239
259
|
else
|
240
260
|
ETL::Engine.logger.debug "CRC matches, skipping"
|
241
261
|
|
262
|
+
raise ConfigurationError, "dimension_table setting required" unless dimension_table
|
263
|
+
raise ConfigurationError, "dimension_target setting required" unless dimension_target
|
264
|
+
|
265
|
+
conn = ETL::Engine.connection(dimension_target)
|
266
|
+
|
242
267
|
q = "SELECT * FROM #{dimension_table} WHERE "
|
243
268
|
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
244
|
-
result =
|
269
|
+
result = conn.select_one(q)
|
245
270
|
if result
|
246
271
|
# This was necessary when truncating and then loading, however I
|
247
272
|
# am getting reluctant to having the ETL process do the truncation
|
@@ -297,7 +322,7 @@ module ETL #:nodoc:
|
|
297
322
|
generator = generators[key] ||= value.new
|
298
323
|
row[key] = generator.next
|
299
324
|
when Symbol
|
300
|
-
generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new
|
325
|
+
generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new(options)
|
301
326
|
row[key] = generator.next
|
302
327
|
when Proc
|
303
328
|
row[key] = value.call(row)
|
@@ -5,6 +5,12 @@ module ETL #:nodoc:
|
|
5
5
|
# loader if it is supported with your target database as it will use a much faster load
|
6
6
|
# method.
|
7
7
|
class DatabaseDestination < Destination
|
8
|
+
# The target connection
|
9
|
+
attr_reader :target
|
10
|
+
|
11
|
+
# The table
|
12
|
+
attr_reader :table
|
13
|
+
|
8
14
|
# Specify the order from the source
|
9
15
|
attr_reader :order
|
10
16
|
|
@@ -19,31 +25,31 @@ module ETL #:nodoc:
|
|
19
25
|
#
|
20
26
|
# Configuration options:
|
21
27
|
# * <tt>:database</tt>: The database name (REQUIRED)
|
28
|
+
# * <tt>:target</tt>: The target connection (REQUIRED)
|
22
29
|
# * <tt>:table</tt>: The table to write to (REQUIRED)
|
23
30
|
# * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
|
24
31
|
# * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
|
25
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
26
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
27
|
-
# * <tt>:password</tt>: The password to the database (defaults to nothing)
|
28
|
-
# * <tt>:host</tt>: The host for the database (defaults to 'localhost')
|
29
32
|
# * <tt>:append_rows</tt>: Array of rows to append
|
30
33
|
#
|
31
34
|
# Mapping options:
|
32
35
|
# * <tt>:order</tt>: The order of fields to write (REQUIRED)
|
33
36
|
def initialize(control, configuration, mapping={})
|
34
37
|
super
|
38
|
+
@target = configuration[:target]
|
39
|
+
@table = configuration[:table]
|
35
40
|
@truncate = configuration[:truncate] ||= false
|
36
41
|
@unique = configuration[:unique]
|
37
42
|
@order = mapping[:order] || order_from_source
|
38
43
|
raise ControlError, "Order required in mapping" unless @order
|
39
|
-
|
44
|
+
raise ControlError, "Table required" unless @table
|
45
|
+
raise ControlError, "Target required" unless @target
|
40
46
|
end
|
41
47
|
|
42
48
|
# Flush the currently buffered data
|
43
49
|
def flush
|
44
|
-
conn = ETL::
|
50
|
+
conn = ETL::Engine.connection(target)
|
45
51
|
conn.transaction do
|
46
|
-
conn.truncate(
|
52
|
+
conn.truncate(table_name) if truncate
|
47
53
|
|
48
54
|
buffer.flatten.each do |row|
|
49
55
|
# check to see if this row's compound key constraint already exists
|
@@ -59,7 +65,7 @@ module ETL #:nodoc:
|
|
59
65
|
names << name
|
60
66
|
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
61
67
|
end
|
62
|
-
q = "INSERT INTO #{
|
68
|
+
q = "INSERT INTO #{table_name} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
63
69
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
64
70
|
conn.insert(q, "Insert row #{current_row}")
|
65
71
|
@current_row += 1
|
@@ -72,29 +78,13 @@ module ETL #:nodoc:
|
|
72
78
|
def close
|
73
79
|
buffer << append_rows if append_rows
|
74
80
|
flush
|
75
|
-
ETL::ActiveRecord::Base.connection.disconnect!
|
76
81
|
end
|
77
82
|
|
78
83
|
private
|
79
|
-
|
80
|
-
|
81
|
-
# Required options:
|
82
|
-
# * <tt>:database</tt>: The database name
|
83
|
-
#
|
84
|
-
# Options:
|
85
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
86
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
87
|
-
# * <tt>:password</tt>: The password to the database (defaults to nothing)
|
88
|
-
# * <tt>:host<tt>: The host for the database (defaults to 'localhost')
|
89
|
-
def connect
|
90
|
-
ETL::ActiveRecord::Base.establish_connection(
|
91
|
-
:adapter => (configuration[:adapter] || :mysql),
|
92
|
-
:username => (configuration[:username] || 'root'),
|
93
|
-
:host => (configuration[:host] || 'localhost'),
|
94
|
-
:password => configuration[:password],
|
95
|
-
:database => configuration[:database]
|
96
|
-
)
|
84
|
+
def table_name
|
85
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
97
86
|
end
|
87
|
+
|
98
88
|
end
|
99
89
|
end
|
100
90
|
end
|