activewarehouse-etl 0.8.4 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +98 -62
- data/Rakefile +11 -0
- data/TODO +2 -1
- data/lib/etl.rb +9 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +1 -0
- data/lib/etl/builder/date_dimension_builder.rb +83 -0
- data/lib/etl/commands/etl.rb +56 -43
- data/lib/etl/control/control.rb +58 -9
- data/lib/etl/control/destination.rb +29 -4
- data/lib/etl/control/destination/database_destination.rb +17 -27
- data/lib/etl/control/source/database_source.rb +17 -40
- data/lib/etl/control/source/file_source.rb +8 -5
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +40 -0
- data/lib/etl/engine.rb +184 -83
- data/lib/etl/execution.rb +1 -0
- data/lib/etl/execution/base.rb +1 -1
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +1 -0
- data/lib/etl/execution/migration.rb +16 -4
- data/lib/etl/generator/surrogate_key_generator.rb +20 -4
- data/lib/etl/http_tools.rb +1 -1
- data/lib/etl/processor/bulk_import_processor.rb +16 -19
- data/lib/etl/processor/check_exist_processor.rb +16 -7
- data/lib/etl/processor/hierarchy_exploder_processor.rb +2 -1
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/surrogate_key_processor.rb +22 -2
- data/lib/etl/processor/truncate_processor.rb +13 -13
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +7 -2
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -5
- data/lib/etl/transform/hierarchy_lookup_transform.rb +7 -14
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +2 -2
- metadata +19 -2
data/lib/etl/builder.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'etl/builder/date_dimension_builder'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Builder #:nodoc:
|
3
|
+
# A builder which will build a data structure which can be used to populate a date dimension using
|
4
|
+
# commonly used date dimension columns.
|
5
|
+
class DateDimensionBuilder
|
6
|
+
# Specify the start date for the first record
|
7
|
+
attr_accessor :start_date
|
8
|
+
|
9
|
+
# Specify the end date for the last record
|
10
|
+
attr_accessor :end_date
|
11
|
+
|
12
|
+
# Define any holiday indicators
|
13
|
+
attr_accessor :holiday_indicators
|
14
|
+
|
15
|
+
# Define the weekday indicators. The default array begins on Sunday and goes to Saturday.
|
16
|
+
cattr_accessor :weekday_indicators
|
17
|
+
@@weekday_indicators = ['Weekend','Weekday','Weekday','Weekday','Weekday','Weekday','Weekend']
|
18
|
+
|
19
|
+
# Initialize the builder.
|
20
|
+
#
|
21
|
+
# * <tt>start_date</tt>: The start date. Defaults to 5 years ago from today.
|
22
|
+
# * <tt>end_date</tt>: The end date. Defaults to now.
|
23
|
+
def initialize(start_date=Time.now.years_ago(5), end_date=Time.now)
|
24
|
+
@start_date = start_date
|
25
|
+
@end_date = end_date
|
26
|
+
@holiday_indicators = []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns an array of hashes representing records in the dimension. The values for each record are
|
30
|
+
# accessed by name.
|
31
|
+
def build(options={})
|
32
|
+
records = []
|
33
|
+
date = start_date.to_time
|
34
|
+
while date <= end_date.to_time
|
35
|
+
record = {}
|
36
|
+
record[:date] = date.strftime("%m/%d/%Y")
|
37
|
+
record[:full_date_description] = date.strftime("%B %d,%Y")
|
38
|
+
record[:day_of_week] = date.strftime("%A")
|
39
|
+
#record[:day_number_in_epoch] = date.to_i / 24
|
40
|
+
#record[:week_number_in_epoch] = date.to_i / (24 * 7)
|
41
|
+
#record[:month_number_in_epoch] = date.to_i / (24 * 7 * 30)
|
42
|
+
record[:day_number_in_calendar_month] = date.day
|
43
|
+
record[:day_number_in_calendar_year] = date.yday
|
44
|
+
record[:day_number_in_fiscal_month] = date.day # should this be different from CY?
|
45
|
+
record[:day_number_in_fiscal_year] = date.fiscal_year_yday
|
46
|
+
#record[:last_day_in_week_indicator] =
|
47
|
+
#record[:last_day_in_month_indicator] =
|
48
|
+
#record[:calendar_week_ending_date] =
|
49
|
+
record[:calendar_week] = "Week #{date.week}"
|
50
|
+
record[:calendar_week_number_in_year] = date.week
|
51
|
+
record[:calendar_month_name] = date.strftime("%B")
|
52
|
+
record[:calendar_month_number_in_year] = date.month
|
53
|
+
record[:calendar_year_month] = date.strftime("%Y-%m")
|
54
|
+
record[:calendar_quarter] = "Q#{date.quarter}"
|
55
|
+
record[:calendar_quarter_number_in_year] = date.quarter
|
56
|
+
record[:calendar_year_quarter] = "#{date.strftime('%Y')}-#{record[:calendar_quarter]}"
|
57
|
+
#record[:calendar_half_year] =
|
58
|
+
record[:calendar_year] = "#{date.year}"
|
59
|
+
record[:fiscal_week] = "FY Week #{date.fiscal_year_week}"
|
60
|
+
record[:fiscal_week_number_in_year] = date.fiscal_year_week
|
61
|
+
record[:fiscal_month] = date.fiscal_year_month
|
62
|
+
record[:fiscal_month_number_in_year] = date.fiscal_year_month
|
63
|
+
record[:fiscal_year_month] = "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0')
|
64
|
+
record[:fiscal_quarter] = "FY Q#{date.fiscal_year_quarter}"
|
65
|
+
record[:fiscal_year_quarter] = "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}"
|
66
|
+
record[:fiscal_year_quarter_number] = date.fiscal_year_quarter
|
67
|
+
#record[:fiscal_half_year] =
|
68
|
+
record[:fiscal_year] = "FY#{date.fiscal_year}"
|
69
|
+
record[:fiscal_year_number] = date.fiscal_year
|
70
|
+
record[:holiday_indicator] = holiday_indicators.include?(date) ? 'Holiday' : 'Nonholiday'
|
71
|
+
record[:weekday_indicator] = weekday_indicators[date.wday]
|
72
|
+
record[:selling_season] = 'None'
|
73
|
+
record[:major_event] = 'None'
|
74
|
+
record[:sql_date_stamp] = date
|
75
|
+
|
76
|
+
records << record
|
77
|
+
date = date.tomorrow
|
78
|
+
end
|
79
|
+
records
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/etl/commands/etl.rb
CHANGED
@@ -24,54 +24,67 @@
|
|
24
24
|
require 'benchmark'
|
25
25
|
require 'getoptlong'
|
26
26
|
|
27
|
-
opts = GetoptLong.new(
|
28
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
29
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT ],
|
30
|
-
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
|
31
|
-
[ '--offset', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
32
|
-
[ '--newlog', '-n', GetoptLong::NO_ARGUMENT ],
|
33
|
-
[ '--skip-bulk-import', '-s', GetoptLong::NO_ARGUMENT ],
|
34
|
-
[ '--read-locally', GetoptLong::NO_ARGUMENT]
|
35
|
-
)
|
36
|
-
|
37
27
|
# Print a usage statement
|
38
28
|
def usage #:nodoc:
|
39
|
-
puts "Usage: etl
|
29
|
+
puts "Usage: etl file [file file ...]" # TODO: add the command line options
|
40
30
|
end
|
41
31
|
|
42
|
-
|
43
|
-
opts
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
32
|
+
def execute
|
33
|
+
opts = GetoptLong.new(
|
34
|
+
[ '--version', '-v', GetoptLong::NO_ARGUMENT],
|
35
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
36
|
+
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT ],
|
37
|
+
[ '--limit', '-l', GetoptLong::REQUIRED_ARGUMENT ],
|
38
|
+
[ '--offset', '-o', GetoptLong::REQUIRED_ARGUMENT],
|
39
|
+
[ '--newlog', '-n', GetoptLong::NO_ARGUMENT ],
|
40
|
+
[ '--skip-bulk-import', '-s', GetoptLong::NO_ARGUMENT ],
|
41
|
+
[ '--read-locally', GetoptLong::NO_ARGUMENT],
|
42
|
+
[ '--rails-root', GetoptLong::REQUIRED_ARGUMENT]
|
43
|
+
)
|
44
|
+
|
45
|
+
options = {}
|
46
|
+
opts.each do |opt, arg|
|
47
|
+
case opt
|
48
|
+
when '--version'
|
49
|
+
puts "ActiveWarehouse ETL version #{ETL::VERSION::STRING}"
|
50
|
+
return
|
51
|
+
when '--help'
|
52
|
+
usage
|
53
|
+
return
|
54
|
+
when '--config'
|
55
|
+
options[:config] = arg
|
56
|
+
when '--limit'
|
57
|
+
options[:limit] = arg.to_i
|
58
|
+
when '--offset'
|
59
|
+
options[:offset] = arg.to_i
|
60
|
+
when '--newlog'
|
61
|
+
options[:newlog] = true
|
62
|
+
when '--skip-bulk-import'
|
63
|
+
puts "skip bulk import enabled"
|
64
|
+
options[:skip_bulk_import] = true
|
65
|
+
when '--read-locally'
|
66
|
+
puts "read locally enabled"
|
67
|
+
options[:read_locally] = true
|
68
|
+
when '--rails-root'
|
69
|
+
options[:rails_root] = arg
|
70
|
+
puts "rails root set to #{options[:rails_root]}"
|
71
|
+
end
|
61
72
|
end
|
62
|
-
end
|
63
73
|
|
64
|
-
if ARGV.length < 1
|
65
|
-
|
66
|
-
else
|
67
|
-
|
74
|
+
if ARGV.length < 1
|
75
|
+
usage
|
76
|
+
else
|
77
|
+
puts "Starting ETL process"
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
79
|
+
ETL::Engine.init(options)
|
80
|
+
ARGV.each do |f|
|
81
|
+
puts "Processing #{f}"
|
82
|
+
ETL::Engine.realtime_activity = true
|
83
|
+
ETL::Engine.process(f)
|
84
|
+
end
|
75
85
|
|
76
|
-
|
77
|
-
end
|
86
|
+
puts "ETL process complete"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
execute
|
data/lib/etl/control/control.rb
CHANGED
@@ -2,6 +2,8 @@ module ETL #:nodoc:
|
|
2
2
|
module Control #:nodoc:
|
3
3
|
# The Context is passed to eval.
|
4
4
|
class Context
|
5
|
+
require 'test/unit/assertions'
|
6
|
+
include Test::Unit::Assertions
|
5
7
|
attr_reader :control
|
6
8
|
|
7
9
|
class << self
|
@@ -26,10 +28,11 @@ module ETL #:nodoc:
|
|
26
28
|
control.error_threshold = error_threshold
|
27
29
|
end
|
28
30
|
|
29
|
-
# Define a list of control files that this file depends on. Those control
|
30
|
-
# will be executed prior to this control file. The list may
|
31
|
-
# be converted to file names by calling
|
32
|
-
# case they will be used
|
31
|
+
# Define a list of control files that this file depends on. Those control
|
32
|
+
# files will be executed prior to this control file. The list may
|
33
|
+
# contain symbols that will be converted to file names by calling
|
34
|
+
# to_s + '.ctl', or they may be strings in which case they will be used
|
35
|
+
# as is
|
33
36
|
def depends_on(*args)
|
34
37
|
dependencies << args
|
35
38
|
end
|
@@ -53,7 +56,7 @@ module ETL #:nodoc:
|
|
53
56
|
if configuration[:type].is_a?(ETL::Control::Source)
|
54
57
|
sources << configuration[:type]
|
55
58
|
else
|
56
|
-
raise "
|
59
|
+
raise ControlError, "Type must be a Class, String, Symbol or object extending ETL::Control::Source"
|
57
60
|
end
|
58
61
|
end
|
59
62
|
else
|
@@ -61,7 +64,9 @@ module ETL #:nodoc:
|
|
61
64
|
if configuration[source_type]
|
62
65
|
source_class = ETL::Control::Source.class_for_name(source_type)
|
63
66
|
sources << source_class.new(self, configuration, definition)
|
67
|
+
break
|
64
68
|
end
|
69
|
+
raise ControlError, "A source was specified but no matching type was found"
|
65
70
|
end
|
66
71
|
end
|
67
72
|
end
|
@@ -73,10 +78,29 @@ module ETL #:nodoc:
|
|
73
78
|
|
74
79
|
# Define a destination
|
75
80
|
def destination(name, configuration={}, mapping={})
|
76
|
-
|
77
|
-
|
78
|
-
|
81
|
+
if configuration[:type]
|
82
|
+
case configuration[:type]
|
83
|
+
when Class
|
84
|
+
dest_class = configuration[:type]
|
79
85
|
destinations << dest_class.new(self, configuration, mapping)
|
86
|
+
when String, Symbol
|
87
|
+
dest_class = ETL::Control::Destination.class_for_name(configuration[:type])
|
88
|
+
destinations << dest_class.new(self, configuration, mapping)
|
89
|
+
else
|
90
|
+
if configuration[:type].is_a?(ETL::Control::Destination)
|
91
|
+
destinations << configuration[:type]
|
92
|
+
else
|
93
|
+
raise ControlError, "Type must be a Class, String, Symbol or object extending ETL::Control::Destination"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
else
|
97
|
+
destination_types.each do |dest_type|
|
98
|
+
if configuration[dest_type]
|
99
|
+
dest_class = ETL::Control::Destination.class_for_name(dest_type)
|
100
|
+
destinations << dest_class.new(self, configuration, mapping)
|
101
|
+
break
|
102
|
+
end
|
103
|
+
raise ControlError, "A destination was specified but no matching destination type was found"
|
80
104
|
end
|
81
105
|
end
|
82
106
|
end
|
@@ -121,6 +145,17 @@ module ETL #:nodoc:
|
|
121
145
|
control.transforms
|
122
146
|
end
|
123
147
|
|
148
|
+
# Define a screen block. The type argument must be one of :fatal, :error
|
149
|
+
# or :warn
|
150
|
+
def screen(type, &block)
|
151
|
+
screens[type] << block
|
152
|
+
end
|
153
|
+
|
154
|
+
# Get the screen blocks
|
155
|
+
def screens
|
156
|
+
control.screens
|
157
|
+
end
|
158
|
+
|
124
159
|
# Rename the source field to the destination field
|
125
160
|
def rename(source, destination)
|
126
161
|
after_read :rename, :source => source, :dest => destination
|
@@ -222,7 +257,6 @@ module ETL #:nodoc:
|
|
222
257
|
# Parse a control file and return a Control instance
|
223
258
|
def parse(control_file)
|
224
259
|
control_file = control_file.path if control_file.instance_of?(File)
|
225
|
-
# logger.debug "Parsing control file #{control_file.path}"
|
226
260
|
control = ETL::Control::Control.new(control_file)
|
227
261
|
# TODO: better handling of parser errors. Return the line in the control file where the error occurs.
|
228
262
|
eval(IO.readlines(control_file).join("\n"), Context.create(control), control_file)
|
@@ -230,6 +264,13 @@ module ETL #:nodoc:
|
|
230
264
|
control
|
231
265
|
end
|
232
266
|
|
267
|
+
def parse_text(text)
|
268
|
+
control = ETL::Control::Control.new(nil)
|
269
|
+
eval(text, Context.create(control), 'inline')
|
270
|
+
control.validate
|
271
|
+
control
|
272
|
+
end
|
273
|
+
|
233
274
|
# Resolve the given object to an ETL::Control::Control instance. Acceptable arguments
|
234
275
|
# are:
|
235
276
|
# * The path to a control file as a String
|
@@ -300,6 +341,14 @@ module ETL #:nodoc:
|
|
300
341
|
@transforms ||= []
|
301
342
|
end
|
302
343
|
|
344
|
+
def screens
|
345
|
+
@screens ||= {
|
346
|
+
:fatal => [],
|
347
|
+
:error => [],
|
348
|
+
:warn => []
|
349
|
+
}
|
350
|
+
end
|
351
|
+
|
303
352
|
# Get the error threshold. Defaults to 100.
|
304
353
|
def error_threshold
|
305
354
|
@error_threshold ||= 100
|
@@ -157,7 +157,12 @@ module ETL #:nodoc:
|
|
157
157
|
|
158
158
|
# Get the dimension table if specified
|
159
159
|
def dimension_table
|
160
|
-
configuration[:scd][:dimension_table] if scd?
|
160
|
+
ETL::Engine.table(configuration[:scd][:dimension_table], dimension_target) if scd?
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get the dimension target if specified
|
164
|
+
def dimension_target
|
165
|
+
configuration[:scd][:dimension_target] if scd?
|
161
166
|
end
|
162
167
|
|
163
168
|
# Process a row to determine the change type
|
@@ -209,10 +214,16 @@ module ETL #:nodoc:
|
|
209
214
|
|
210
215
|
if scd_type == 2
|
211
216
|
ETL::Engine.logger.debug "type 2 SCD"
|
217
|
+
|
218
|
+
raise ConfigurationError, "dimension_table setting required" unless dimension_table
|
219
|
+
raise ConfigurationError, "dimension_target setting required" unless dimension_target
|
220
|
+
|
221
|
+
conn = ETL::Engine.connection(dimension_target)
|
222
|
+
|
212
223
|
q = "SELECT * FROM #{dimension_table} WHERE "
|
213
224
|
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
214
225
|
#puts "looking for original record"
|
215
|
-
result =
|
226
|
+
result = conn.select_one(q)
|
216
227
|
if result
|
217
228
|
#puts "Result: #{result.inspect}"
|
218
229
|
original_record = ETL::Row[result.symbolize_keys!]
|
@@ -223,6 +234,15 @@ module ETL #:nodoc:
|
|
223
234
|
# need to figure out how to delete that old record before inserting the
|
224
235
|
# updated version of the record
|
225
236
|
|
237
|
+
q = "DELETE FROM #{dimension_table} WHERE "
|
238
|
+
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
239
|
+
|
240
|
+
num_rows_affected = conn.delete(q)
|
241
|
+
ETL::Engine.logger.debug "deleted old row"
|
242
|
+
|
243
|
+
# do this?
|
244
|
+
#raise "Should have deleted a single record" if num_rows_affected != 1
|
245
|
+
|
226
246
|
buffer << original_record
|
227
247
|
end
|
228
248
|
|
@@ -239,9 +259,14 @@ module ETL #:nodoc:
|
|
239
259
|
else
|
240
260
|
ETL::Engine.logger.debug "CRC matches, skipping"
|
241
261
|
|
262
|
+
raise ConfigurationError, "dimension_table setting required" unless dimension_table
|
263
|
+
raise ConfigurationError, "dimension_target setting required" unless dimension_target
|
264
|
+
|
265
|
+
conn = ETL::Engine.connection(dimension_target)
|
266
|
+
|
242
267
|
q = "SELECT * FROM #{dimension_table} WHERE "
|
243
268
|
q << natural_key.collect { |nk| "#{nk} = '#{row[nk]}'" }.join(" AND ")
|
244
|
-
result =
|
269
|
+
result = conn.select_one(q)
|
245
270
|
if result
|
246
271
|
# This was necessary when truncating and then loading, however I
|
247
272
|
# am getting reluctant to having the ETL process do the truncation
|
@@ -297,7 +322,7 @@ module ETL #:nodoc:
|
|
297
322
|
generator = generators[key] ||= value.new
|
298
323
|
row[key] = generator.next
|
299
324
|
when Symbol
|
300
|
-
generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new
|
325
|
+
generator = generators[key] ||= ETL::Generator::Generator.class_for_name(value).new(options)
|
301
326
|
row[key] = generator.next
|
302
327
|
when Proc
|
303
328
|
row[key] = value.call(row)
|
@@ -5,6 +5,12 @@ module ETL #:nodoc:
|
|
5
5
|
# loader if it is supported with your target database as it will use a much faster load
|
6
6
|
# method.
|
7
7
|
class DatabaseDestination < Destination
|
8
|
+
# The target connection
|
9
|
+
attr_reader :target
|
10
|
+
|
11
|
+
# The table
|
12
|
+
attr_reader :table
|
13
|
+
|
8
14
|
# Specify the order from the source
|
9
15
|
attr_reader :order
|
10
16
|
|
@@ -19,31 +25,31 @@ module ETL #:nodoc:
|
|
19
25
|
#
|
20
26
|
# Configuration options:
|
21
27
|
# * <tt>:database</tt>: The database name (REQUIRED)
|
28
|
+
# * <tt>:target</tt>: The target connection (REQUIRED)
|
22
29
|
# * <tt>:table</tt>: The table to write to (REQUIRED)
|
23
30
|
# * <tt>:truncate</tt>: Set to true to truncate before writing (defaults to false)
|
24
31
|
# * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
|
25
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
26
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
27
|
-
# * <tt>:password</tt>: The password to the database (defaults to nothing)
|
28
|
-
# * <tt>:host</tt>: The host for the database (defaults to 'localhost')
|
29
32
|
# * <tt>:append_rows</tt>: Array of rows to append
|
30
33
|
#
|
31
34
|
# Mapping options:
|
32
35
|
# * <tt>:order</tt>: The order of fields to write (REQUIRED)
|
33
36
|
def initialize(control, configuration, mapping={})
|
34
37
|
super
|
38
|
+
@target = configuration[:target]
|
39
|
+
@table = configuration[:table]
|
35
40
|
@truncate = configuration[:truncate] ||= false
|
36
41
|
@unique = configuration[:unique]
|
37
42
|
@order = mapping[:order] || order_from_source
|
38
43
|
raise ControlError, "Order required in mapping" unless @order
|
39
|
-
|
44
|
+
raise ControlError, "Table required" unless @table
|
45
|
+
raise ControlError, "Target required" unless @target
|
40
46
|
end
|
41
47
|
|
42
48
|
# Flush the currently buffered data
|
43
49
|
def flush
|
44
|
-
conn = ETL::
|
50
|
+
conn = ETL::Engine.connection(target)
|
45
51
|
conn.transaction do
|
46
|
-
conn.truncate(
|
52
|
+
conn.truncate(table_name) if truncate
|
47
53
|
|
48
54
|
buffer.flatten.each do |row|
|
49
55
|
# check to see if this row's compound key constraint already exists
|
@@ -59,7 +65,7 @@ module ETL #:nodoc:
|
|
59
65
|
names << name
|
60
66
|
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
61
67
|
end
|
62
|
-
q = "INSERT INTO #{
|
68
|
+
q = "INSERT INTO #{table_name} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
63
69
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
64
70
|
conn.insert(q, "Insert row #{current_row}")
|
65
71
|
@current_row += 1
|
@@ -72,29 +78,13 @@ module ETL #:nodoc:
|
|
72
78
|
def close
|
73
79
|
buffer << append_rows if append_rows
|
74
80
|
flush
|
75
|
-
ETL::ActiveRecord::Base.connection.disconnect!
|
76
81
|
end
|
77
82
|
|
78
83
|
private
|
79
|
-
|
80
|
-
|
81
|
-
# Required options:
|
82
|
-
# * <tt>:database</tt>: The database name
|
83
|
-
#
|
84
|
-
# Options:
|
85
|
-
# * <tt>:adapter</tt>: The adapter to use (defaults to :mysql)
|
86
|
-
# * <tt>:username</tt>: The database username (defaults to 'root')
|
87
|
-
# * <tt>:password</tt>: The password to the database (defaults to nothing)
|
88
|
-
# * <tt>:host<tt>: The host for the database (defaults to 'localhost')
|
89
|
-
def connect
|
90
|
-
ETL::ActiveRecord::Base.establish_connection(
|
91
|
-
:adapter => (configuration[:adapter] || :mysql),
|
92
|
-
:username => (configuration[:username] || 'root'),
|
93
|
-
:host => (configuration[:host] || 'localhost'),
|
94
|
-
:password => configuration[:password],
|
95
|
-
:database => configuration[:database]
|
96
|
-
)
|
84
|
+
def table_name
|
85
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
97
86
|
end
|
87
|
+
|
98
88
|
end
|
99
89
|
end
|
100
90
|
end
|