bricolage 5.15.2 → 5.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/jobclass/my-import.rb +79 -0
- data/jobclass/streaming_load.rb +3 -11
- data/lib/bricolage/mysqldatasource.rb +100 -0
- data/lib/bricolage/postgresconnection.rb +81 -47
- data/lib/bricolage/psqldatasource.rb +2 -28
- data/lib/bricolage/redisdatasource.rb +89 -72
- data/lib/bricolage/version.rb +1 -1
- data/libexec/mys3dump.jar +0 -0
- data/test/home/Gemfile.lock +1 -1
- data/test/home/config/development/database.yml +2 -2
- data/test/home/config.yml +13 -0
- data/test/home/subsys/my-import.job +32 -0
- data/test/home/subsys/users.ct +13 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2660120a7623731fb14dac13b2efa0582e535265
|
4
|
+
data.tar.gz: 859704e487ea718d76bc4567d4e46451acad39db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a68f420cbfbc54ad9596200c3dc00204e4c0105b68051e06eeb256c7096bc2334cd9dceb678c3b997efe09dca22103ea6545845b76a8824c6eea69fb4779d9cc
|
7
|
+
data.tar.gz: a39835ca04114f75c778c9c129b8ab7e08ea8d9601dcd45504c2fbac84bb5ac286914cc55db308ad49cef94d55de61d58682e23537a0d595a8916e003ded9891
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'bricolage/psqldatasource'
|
2
|
+
require 'bricolage/mysqldatasource'
|
3
|
+
|
4
|
+
JobClass.define('my-import') {
|
5
|
+
parameters {|params|
|
6
|
+
# S3Export
|
7
|
+
params.add SrcTableParam.new(optional: false)
|
8
|
+
params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
|
9
|
+
params.add SQLFileParam.new(optional: true)
|
10
|
+
params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
|
11
|
+
params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
|
12
|
+
params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
|
13
|
+
|
14
|
+
# Load
|
15
|
+
params.add DestTableParam.new(optional: false)
|
16
|
+
params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
|
17
|
+
params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
|
18
|
+
optional: true, default: PSQLLoadOptions.new,
|
19
|
+
value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
|
20
|
+
params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
|
21
|
+
|
22
|
+
# Misc
|
23
|
+
params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
|
24
|
+
params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
|
25
|
+
params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
|
26
|
+
params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
|
27
|
+
|
28
|
+
# All
|
29
|
+
params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
|
30
|
+
params.add OptionalBoolParam.new('put', 'Runs PUT task.')
|
31
|
+
params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
|
32
|
+
params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
|
33
|
+
}
|
34
|
+
|
35
|
+
script {|params, script|
|
36
|
+
run_all = !params['export'] && !params['put'] && !params['load']
|
37
|
+
|
38
|
+
# S3Export
|
39
|
+
if params['export'] || run_all
|
40
|
+
script.task(params['src-ds']) {|task|
|
41
|
+
task.s3export params['src-tables'].keys.first,
|
42
|
+
params['sql-file'],
|
43
|
+
params['s3-ds'],
|
44
|
+
params['s3-prefix'],
|
45
|
+
params['gzip'],
|
46
|
+
dump_options: params['dump-options']
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
# Load
|
51
|
+
if params['load'] || run_all
|
52
|
+
script.task(params['dest-ds']) {|task|
|
53
|
+
prev_table = '${dest_table}_old'
|
54
|
+
work_table = '${dest_table}_wk'
|
55
|
+
|
56
|
+
# CREATE
|
57
|
+
task.drop_force prev_table
|
58
|
+
task.drop_force work_table
|
59
|
+
task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
|
60
|
+
|
61
|
+
# COPY
|
62
|
+
task.load params['s3-ds'], params['s3-prefix'], work_table,
|
63
|
+
'json', nil, params['options'].merge('gzip' => params['gzip'])
|
64
|
+
|
65
|
+
# VACUUM, ANALYZE, GRANT
|
66
|
+
task.vacuum_if params['vacuum'], params['vacuum-sort'], work_table
|
67
|
+
task.analyze_if params['analyze'], work_table
|
68
|
+
task.grant_if params['grant'], work_table
|
69
|
+
|
70
|
+
# RENAME
|
71
|
+
task.create_dummy_table '${dest_table}'
|
72
|
+
task.transaction {
|
73
|
+
task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
|
74
|
+
task.rename_table work_table, params['dest-table'].name
|
75
|
+
}
|
76
|
+
}
|
77
|
+
end
|
78
|
+
}
|
79
|
+
}
|
data/jobclass/streaming_load.rb
CHANGED
@@ -261,21 +261,13 @@ class StreamingLoadJobClass < RubyJobClass
|
|
261
261
|
|
262
262
|
def create_tmp_log_table(conn, log_url)
|
263
263
|
target_table = log_table_wk
|
264
|
-
execute_update conn, "
|
264
|
+
execute_update conn, "truncate #{target_table};"
|
265
265
|
execute_update conn, load_log_copy_stmt(target_table, log_url, @src.credential_string)
|
266
|
-
|
267
|
-
yield target_table
|
268
|
-
ensure
|
269
|
-
begin
|
270
|
-
execute_update conn, "drop table #{target_table}"
|
271
|
-
rescue PostgreSQLException => ex
|
272
|
-
@logger.error ex.message + " (ignored)"
|
273
|
-
end
|
274
|
-
end
|
266
|
+
yield target_table
|
275
267
|
end
|
276
268
|
|
277
269
|
def log_table_wk
|
278
|
-
"#{@log_table}
|
270
|
+
"#{@log_table}_wk"
|
279
271
|
end
|
280
272
|
|
281
273
|
def load_log_copy_stmt(target_table, log_url, credential_string)
|
@@ -15,6 +15,8 @@ module Bricolage
|
|
15
15
|
@client = nil
|
16
16
|
end
|
17
17
|
|
18
|
+
attr_reader :mysql_options
|
19
|
+
|
18
20
|
def host
|
19
21
|
@mysql_options[:host]
|
20
22
|
end
|
@@ -172,6 +174,104 @@ module Bricolage
|
|
172
174
|
end
|
173
175
|
end
|
174
176
|
|
177
|
+
def s3export(table, stmt, s3ds, prefix, gzip, dump_options)
|
178
|
+
options = dump_options.nil? ? {} : dump_options[:dump_options]
|
179
|
+
add S3Export.new(table, stmt, s3ds, prefix, gzip: gzip,
|
180
|
+
format: options['format'],
|
181
|
+
partition_column: options['partition_column'],
|
182
|
+
partition_number: options['partition_number'],
|
183
|
+
write_concurrency: options['write_concurrency'],
|
184
|
+
rotation_size: options['rotation_size'],
|
185
|
+
delete_objects: options['delete_objects'],
|
186
|
+
object_key_delimiter: options['object_key_delimiter'])
|
187
|
+
end
|
188
|
+
|
189
|
+
class S3Export < Action
|
190
|
+
|
191
|
+
def initialize(table, stmt, s3ds, prefix, gzip: true,
|
192
|
+
format: "json",
|
193
|
+
partition_column: nil,
|
194
|
+
partition_number: 4,
|
195
|
+
write_concurrency: 4,
|
196
|
+
rotation_size: nil,
|
197
|
+
delete_objects: false,
|
198
|
+
object_key_delimiter: nil)
|
199
|
+
@table = table
|
200
|
+
@statement = stmt
|
201
|
+
@s3ds = s3ds
|
202
|
+
@prefix = build_prefix @s3ds.prefix, prefix
|
203
|
+
@format = format
|
204
|
+
@gzip = gzip
|
205
|
+
@partition_column = partition_column
|
206
|
+
@partition_number = partition_number
|
207
|
+
@write_concurrency = write_concurrency
|
208
|
+
@rotation_size = rotation_size
|
209
|
+
@delete_objects = delete_objects
|
210
|
+
@object_key_delimiter = object_key_delimiter
|
211
|
+
end
|
212
|
+
|
213
|
+
def run
|
214
|
+
s3export
|
215
|
+
JobResult.success
|
216
|
+
end
|
217
|
+
|
218
|
+
def bind(*args)
|
219
|
+
@statement.bind(*args) if @statement
|
220
|
+
end
|
221
|
+
|
222
|
+
def s3export
|
223
|
+
cmd = build_cmd(command_parameters)
|
224
|
+
ds.logger.info '[CMD] ' + cmd.join(' ')
|
225
|
+
Open3.popen2e(*cmd) do |input, output, thread|
|
226
|
+
input.close
|
227
|
+
output.each do |line|
|
228
|
+
puts line
|
229
|
+
end
|
230
|
+
unless thread.value.success?
|
231
|
+
raise JobFailure, "#{cmd.join(' ')} failed (status #{thread.value.to_i})"
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def command_parameters
|
237
|
+
params = {jar: mys3dump_path.to_s, h: ds.host, P: ds.port.to_s, D: ds.database, u: ds.username, p: ds.password, o: connection_property, t: @table,
|
238
|
+
'Daws.accessKeyId': @s3ds.access_key, 'Daws.secretKey': @s3ds.secret_key, b: @s3ds.bucket.name, x: @prefix}
|
239
|
+
params[:q] = @statement.stripped_source.chop if @statement
|
240
|
+
params[:f] = @format if @format
|
241
|
+
params[:C] = nil if @gzip
|
242
|
+
params[:c] = @partition_column if @partition_column
|
243
|
+
params[:n] = @partition_number if @partition_number
|
244
|
+
params[:w] = @write_concurrency if @write_concurrency
|
245
|
+
params[:r] = @rotation_size if @rotation_size
|
246
|
+
params[:d] = nil if @delete_objects
|
247
|
+
params[:k] = @object_key_delimiter if @object_key_delimiter
|
248
|
+
params
|
249
|
+
end
|
250
|
+
|
251
|
+
OPTION_MAP = {
|
252
|
+
encoding: 'useUnicode=true&characterEncoding',
|
253
|
+
read_timeout: 'netTimeoutForStreamingResults',
|
254
|
+
connect_timeout: 'connectTimeout',
|
255
|
+
reconnect: 'autoReconnect'
|
256
|
+
}
|
257
|
+
|
258
|
+
def connection_property
|
259
|
+
ds.mysql_options.map {|k, v| opt = OPTION_MAP[k] ; opt ? "#{opt}=#{v}" : nil }.compact.join('&')
|
260
|
+
end
|
261
|
+
|
262
|
+
def build_prefix(ds_prefix, pm_prefix)
|
263
|
+
((ds_prefix || "") + "//" + (pm_prefix.to_s || "")).gsub(%r<\A/>, '').gsub(%r<//>, '/')
|
264
|
+
end
|
265
|
+
|
266
|
+
def mys3dump_path
|
267
|
+
Pathname(__dir__).parent.parent + "libexec/mys3dump.jar"
|
268
|
+
end
|
269
|
+
|
270
|
+
def build_cmd(options)
|
271
|
+
['java'] + options.flat_map {|k, v| v ? ["-#{k}", v.to_s] : ["-#{k}"] }
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
175
275
|
WRITER_CLASSES = {}
|
176
276
|
|
177
277
|
class JSONWriter
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'bricolage/exception'
|
2
|
-
require 'securerandom'
|
3
2
|
require 'pg'
|
4
3
|
|
5
4
|
module Bricolage
|
@@ -11,14 +10,13 @@ module Bricolage
|
|
11
10
|
@connection = connection
|
12
11
|
@ds = ds
|
13
12
|
@logger = logger
|
14
|
-
@cursor = nil
|
15
13
|
end
|
16
14
|
|
17
15
|
def source
|
18
16
|
@connection
|
19
17
|
end
|
20
18
|
|
21
|
-
def
|
19
|
+
def execute_update(query)
|
22
20
|
@logger.info "[#{@ds.name}] #{query}"
|
23
21
|
log_elapsed_time {
|
24
22
|
rs = @connection.exec(query)
|
@@ -30,42 +28,97 @@ module Bricolage
|
|
30
28
|
raise PostgreSQLException.wrap(ex)
|
31
29
|
end
|
32
30
|
|
31
|
+
alias execute execute_update
|
32
|
+
alias update execute_update
|
33
|
+
|
34
|
+
def select(table, &block)
|
35
|
+
execute_query("select * from #{table}", &block)
|
36
|
+
end
|
37
|
+
|
33
38
|
def execute_query(query, &block)
|
34
39
|
@logger.info "[#{@ds.name}] #{query}"
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@logger.info "[#{@ds.name}] #{declare_cursor}"
|
44
|
-
@connection.exec(declare_cursor)
|
45
|
-
elsif !@cursor.nil? && cursor.nil?
|
46
|
-
raise "Cannot declare new cursor. Cursor in use: #{@cursor}"
|
47
|
-
elsif @cursor != cursor
|
48
|
-
raise "Specified cursor not exists. Specified: #{cursor}, Current: #{@cursor}"
|
49
|
-
end
|
50
|
-
fetch = "fetch #{fetch_size} in #{@cursor}"
|
51
|
-
@logger.info "[#{@ds.name}] #{fetch}" if cursor.nil?
|
52
|
-
yield @connection.exec(fetch)
|
53
|
-
return @cursor
|
40
|
+
rs = log_elapsed_time {
|
41
|
+
@connection.exec(query)
|
42
|
+
}
|
43
|
+
result = yield rs
|
44
|
+
rs.clear
|
45
|
+
result
|
46
|
+
rescue PG::Error => ex
|
47
|
+
raise PostgreSQLException.wrap(ex)
|
54
48
|
end
|
55
49
|
|
56
|
-
|
57
|
-
|
50
|
+
alias query execute_query
|
51
|
+
|
52
|
+
def query_batch(query, batch_size = 5000, &block)
|
53
|
+
open_cursor(query) {|cur|
|
54
|
+
cur.each_result_set(batch_size, &block)
|
55
|
+
}
|
58
56
|
end
|
59
57
|
|
60
|
-
def
|
61
|
-
@
|
58
|
+
def streaming_execute_query(query, &block)
|
59
|
+
@logger.info "[#{@ds.name}] #{query}"
|
60
|
+
log_elapsed_time {
|
61
|
+
@connection.send_query(query)
|
62
|
+
}
|
63
|
+
@connection.set_single_row_mode
|
64
|
+
while rs = @connection.get_result
|
65
|
+
begin
|
66
|
+
rs.check
|
67
|
+
yield rs
|
68
|
+
ensure
|
69
|
+
rs.clear
|
70
|
+
end
|
71
|
+
end
|
72
|
+
rescue PG::Error => ex
|
73
|
+
raise PostgreSQLException.wrap(ex)
|
62
74
|
end
|
63
75
|
|
64
76
|
def in_transaction?
|
65
77
|
@connection.transaction_status == PG::Constants::PQTRANS_INTRANS
|
66
78
|
end
|
67
79
|
|
68
|
-
|
80
|
+
def transaction
|
81
|
+
execute 'begin transaction'
|
82
|
+
yield
|
83
|
+
execute 'commit'
|
84
|
+
end
|
85
|
+
|
86
|
+
def open_cursor(query, name = nil, &block)
|
87
|
+
unless in_transaction?
|
88
|
+
transaction {
|
89
|
+
return open_cursor(query, &block)
|
90
|
+
}
|
91
|
+
end
|
92
|
+
name ||= make_unique_cursor_name
|
93
|
+
execute "declare #{name} cursor for #{query}"
|
94
|
+
yield Cursor.new(name, self, @logger)
|
95
|
+
end
|
96
|
+
|
97
|
+
Thread.current['bricolage_cursor_seq'] = 0
|
98
|
+
|
99
|
+
def make_unique_cursor_name
|
100
|
+
seq = (Thread.current['bricolage_cursor_seq'] += 1)
|
101
|
+
"cur_bric_#{$$}_#{'%X' % Thread.current.object_id}_#{seq}"
|
102
|
+
end
|
103
|
+
|
104
|
+
class Cursor
|
105
|
+
def initialize(name, conn, logger)
|
106
|
+
@name = name
|
107
|
+
@conn = conn
|
108
|
+
@logger = logger
|
109
|
+
end
|
110
|
+
|
111
|
+
attr_reader :name
|
112
|
+
|
113
|
+
def each_result_set(fetch_size = 5000)
|
114
|
+
while true
|
115
|
+
@conn.execute_query("fetch #{fetch_size} in #{@name}") {|rs|
|
116
|
+
return if rs.values.empty?
|
117
|
+
yield rs
|
118
|
+
}
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
69
122
|
|
70
123
|
def drop_table(name)
|
71
124
|
execute "drop table #{name} cascade;"
|
@@ -77,12 +130,6 @@ module Bricolage
|
|
77
130
|
@logger.error err.message
|
78
131
|
end
|
79
132
|
|
80
|
-
def select(table, &block)
|
81
|
-
query = "select * from #{table}"
|
82
|
-
@logger.info "[#{@ds.name}] #{query}"
|
83
|
-
exec(query, &block)
|
84
|
-
end
|
85
|
-
|
86
133
|
def vacuum(table)
|
87
134
|
execute "vacuum #{table};"
|
88
135
|
end
|
@@ -105,19 +152,6 @@ module Bricolage
|
|
105
152
|
t = e - b
|
106
153
|
@logger.info "#{'%.1f' % t} secs"
|
107
154
|
end
|
108
|
-
|
109
|
-
def exec(query, &block)
|
110
|
-
@connection.send_query(query)
|
111
|
-
@connection.set_single_row_mode
|
112
|
-
loop do
|
113
|
-
rs = @connection.get_result or break
|
114
|
-
begin
|
115
|
-
rs.check
|
116
|
-
yield rs
|
117
|
-
ensure
|
118
|
-
rs.clear
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
155
|
end
|
156
|
+
|
123
157
|
end
|
@@ -102,34 +102,8 @@ module Bricolage
|
|
102
102
|
conn.close if conn
|
103
103
|
end
|
104
104
|
|
105
|
-
def
|
106
|
-
open {|conn| conn.
|
107
|
-
end
|
108
|
-
|
109
|
-
def execute_query(query, &block)
|
110
|
-
open {|conn| conn.execute_query(query, &block) }
|
111
|
-
end
|
112
|
-
|
113
|
-
def cursor_transaction(&block)
|
114
|
-
raise "Cursor in use" if cursor_in_transaction?
|
115
|
-
conn = PG::Connection.open(host: @host, port: @port, dbname: @database, user: @user, password: password)
|
116
|
-
@cur_conn = PostgresConnection.new(conn, self, logger)
|
117
|
-
@cur_conn.execute("begin transaction")
|
118
|
-
yield
|
119
|
-
ensure
|
120
|
-
@cur_conn.execute("commit") if cursor_in_transaction?
|
121
|
-
@cur_conn.clear_cursor if @cur_conn
|
122
|
-
@cur_conn = nil
|
123
|
-
conn.close if conn
|
124
|
-
end
|
125
|
-
|
126
|
-
def cursor_execute_query(query, fetch_size: 10000, cursor: nil, &block)
|
127
|
-
raise "Begin transaction before invoking this method" unless cursor_in_transaction?
|
128
|
-
@cur_conn.execute_query_with_cursor(query, fetch_size, cursor, &block)
|
129
|
-
end
|
130
|
-
|
131
|
-
def cursor_in_transaction?
|
132
|
-
@cur_conn && @cur_conn.in_transaction?
|
105
|
+
def query_batch(query, batch_size = 5000, &block)
|
106
|
+
open {|conn| conn.query_batch(query, batch_size, &block) }
|
133
107
|
end
|
134
108
|
|
135
109
|
def drop_table(name)
|
@@ -22,8 +22,9 @@ module Bricolage
|
|
22
22
|
RedisTask.new(self)
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
|
25
|
+
def open
|
26
|
+
client = Redis.new(host: @host, port: @port, **@options)
|
27
|
+
yield client
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
@@ -38,11 +39,9 @@ module Bricolage
|
|
38
39
|
@table = table
|
39
40
|
@query = query
|
40
41
|
@key_columns = key_column.split(',').map(&:strip)
|
41
|
-
@prefix = prefix
|
42
|
+
@prefix = prefix || "#{@table.last.schema}_#{@table.last.name}_"
|
42
43
|
@encode = encode
|
43
44
|
@expire = expire
|
44
|
-
@read_count = 0
|
45
|
-
@write_count = 0
|
46
45
|
end
|
47
46
|
|
48
47
|
def bind(*args)
|
@@ -53,84 +52,102 @@ module Bricolage
|
|
53
52
|
@query.stripped_source
|
54
53
|
end
|
55
54
|
|
56
|
-
def
|
57
|
-
|
55
|
+
def run
|
56
|
+
logger = ds.logger
|
57
|
+
begin
|
58
|
+
logger.info "Key Pattern: #{@prefix}<#{@key_columns.join('_')}>"
|
59
|
+
logger.info "Encode: #{@encode}"
|
60
|
+
logger.info "Expire: #{@expire}"
|
61
|
+
ds.open {|client|
|
62
|
+
writer = RedisRowWriter.for_encode(@encode).new(client, @prefix, @key_columns)
|
63
|
+
import writer
|
64
|
+
}
|
65
|
+
rescue => ex
|
66
|
+
logger.exception ex
|
67
|
+
raise JobFailure, ex.message
|
68
|
+
end
|
69
|
+
JobResult.success
|
58
70
|
end
|
59
71
|
|
60
|
-
|
61
|
-
@src.cursor_transaction {
|
62
|
-
read_count = 0
|
63
|
-
loop do
|
64
|
-
ds.client.pipelined do
|
65
|
-
read_count = read_row do |row|
|
66
|
-
write_row row
|
67
|
-
end
|
68
|
-
end
|
69
|
-
break if read_count == 0
|
70
|
-
end
|
71
|
-
}
|
72
|
-
@cursor = nil
|
73
|
-
end
|
72
|
+
BATCH_SIZE = 5000
|
74
73
|
|
75
|
-
def
|
76
|
-
|
77
|
-
@
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
74
|
+
def import(writer)
|
75
|
+
count = 0
|
76
|
+
@src.query_batch(source, BATCH_SIZE) do |rs|
|
77
|
+
writer.pipelined {
|
78
|
+
rs.each do |row|
|
79
|
+
writer.write(row)
|
80
|
+
count += 1
|
81
|
+
ds.logger.info "transfered: #{count} rows" if count % 100_0000 == 0
|
82
|
+
end
|
83
|
+
}
|
85
84
|
end
|
86
|
-
|
85
|
+
ds.logger.info "all rows written: #{count} rows"
|
87
86
|
end
|
87
|
+
end
|
88
|
+
end
|
88
89
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
data.each do |field,value|
|
97
|
-
f.push ds.client.hset(key, field, value)
|
98
|
-
end
|
99
|
-
when 'json'
|
100
|
-
f.push ds.client.set(key, JSON.generate(data))
|
101
|
-
else
|
102
|
-
raise %Q("encode: #{type}" is not supported)
|
103
|
-
end
|
104
|
-
f.push ds.client.expire(key, @expire) if @expire
|
105
|
-
@write_count += 1
|
106
|
-
return f
|
90
|
+
class RedisRowWriter
|
91
|
+
def RedisRowWriter.for_encode(encode)
|
92
|
+
case encode
|
93
|
+
when 'hash' then RedisHashRowWriter
|
94
|
+
when 'json' then RedisJSONRowWriter
|
95
|
+
else
|
96
|
+
raise ParameterError, "unsupported Redis encode: #{encode.inspect}"
|
107
97
|
end
|
98
|
+
end
|
108
99
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
r.empty? ? {1 => 1} : r
|
115
|
-
end
|
100
|
+
def initialize(client, prefix, key_columns)
|
101
|
+
@client = client
|
102
|
+
@prefix = prefix
|
103
|
+
@key_columns = key_columns
|
104
|
+
end
|
116
105
|
|
117
|
-
|
118
|
-
|
119
|
-
end
|
106
|
+
attr_reader :prefix
|
107
|
+
attr_reader :write_count
|
120
108
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
raise JobFailure, ex.message
|
130
|
-
end
|
131
|
-
ds.logger.info "Rows written: #{@write_count}"
|
132
|
-
JobResult.success
|
109
|
+
def key(row)
|
110
|
+
@prefix + @key_columns.map {|k| row[k] }.join('_')
|
111
|
+
end
|
112
|
+
|
113
|
+
def value_columns(row)
|
114
|
+
r = row.dup
|
115
|
+
@key_columns.each do |key|
|
116
|
+
r.delete(key)
|
133
117
|
end
|
118
|
+
r.empty? ? {1 => 1} : r
|
119
|
+
end
|
120
|
+
|
121
|
+
def pipelined(&block)
|
122
|
+
@client.pipelined(&block)
|
123
|
+
end
|
124
|
+
|
125
|
+
def write(row)
|
126
|
+
key = key(row)
|
127
|
+
futures = do_write(key, value_columns(row))
|
128
|
+
futures.push @client.expire(key, @expire) if @expire
|
129
|
+
futures
|
130
|
+
end
|
131
|
+
|
132
|
+
def expire
|
133
|
+
@client.expire(key, @expire)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
class RedisHashRowWriter < RedisRowWriter
|
138
|
+
def do_write(key, values)
|
139
|
+
# set a value for each key:field pair
|
140
|
+
values.map {|field, value|
|
141
|
+
@client.hset(key, field, value)
|
142
|
+
}
|
134
143
|
end
|
135
144
|
end
|
145
|
+
|
146
|
+
class RedisJSONRowWriter < RedisRowWriter
|
147
|
+
def do_write(key, values)
|
148
|
+
future = @client.set(key, JSON.generate(values))
|
149
|
+
[future]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
136
153
|
end
|
data/lib/bricolage/version.rb
CHANGED
Binary file
|
data/test/home/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
sql: &redshift_parameters
|
2
2
|
type: psql
|
3
|
-
host: <%= ENV['
|
4
|
-
port: <%= ENV['
|
3
|
+
host: <%= ENV['RSHOST'] || 'dwh.ckpd.co' %>
|
4
|
+
port: <%= ENV['RSPORT'] || 5439 %>
|
5
5
|
database: production
|
6
6
|
username: <%= ENV['RSUSER'] || 'tabemirudev' %>
|
7
7
|
pgpass: <%= user_home_relative_path '.pgpass' %>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
event_queue:
|
2
|
+
sqs_url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
|
3
|
+
visibility_timeout: 60
|
4
|
+
|
5
|
+
load_queue:
|
6
|
+
sqs_url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/XXXXXXXXXXXXXX"
|
7
|
+
visibility_timeout: 60
|
8
|
+
|
9
|
+
url_patterns:
|
10
|
+
-
|
11
|
+
url: "s3://redshift-copy-buffer/development/log_stream/\\d{8}_\\d{4}_\\d+.*\\.gz"
|
12
|
+
schema: "aamine"
|
13
|
+
table: "pv_log"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class: my-import
|
2
|
+
src-ds: mysql
|
3
|
+
src-tables:
|
4
|
+
users: main.users
|
5
|
+
s3-ds: s3
|
6
|
+
s3-prefix: shimpeko/test-abc-
|
7
|
+
gzip: true
|
8
|
+
dump-options:
|
9
|
+
partition_column: id
|
10
|
+
partition_number: 8
|
11
|
+
write_concurrency: 16
|
12
|
+
rotation_size: 16000000
|
13
|
+
delete_objects: true
|
14
|
+
dest-ds: sql
|
15
|
+
dest-table: $test_schema.users
|
16
|
+
table-def: users.ct
|
17
|
+
options:
|
18
|
+
statupdate: false
|
19
|
+
compupdate: false
|
20
|
+
maxerror: 3
|
21
|
+
acceptinvchars: " "
|
22
|
+
#trimblanks: true
|
23
|
+
#truncatecolumns: true
|
24
|
+
## datetime
|
25
|
+
#acceptanydate: true
|
26
|
+
#dateformat: "auto"
|
27
|
+
#timeformat: "auto"
|
28
|
+
vacuum-sort: true
|
29
|
+
analyze: true
|
30
|
+
grant:
|
31
|
+
privilege: select
|
32
|
+
to: "$test_group"
|
@@ -0,0 +1,13 @@
|
|
1
|
+
--dest-table: users
|
2
|
+
|
3
|
+
create table $dest_table
|
4
|
+
( id int encode delta
|
5
|
+
, user_name varchar(1000) encode lzo
|
6
|
+
, birthday date encode lzo
|
7
|
+
, zip varchar(255) encode lzo
|
8
|
+
, created_at timestamp encode lzo
|
9
|
+
, updated_at timestamp encode lzo
|
10
|
+
)
|
11
|
+
distkey (id)
|
12
|
+
sortkey (id)
|
13
|
+
;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- jobclass/insert.rb
|
127
127
|
- jobclass/load.rb
|
128
128
|
- jobclass/my-export.rb
|
129
|
+
- jobclass/my-import.rb
|
129
130
|
- jobclass/my-migrate.rb
|
130
131
|
- jobclass/noop.rb
|
131
132
|
- jobclass/rebuild-drop.rb
|
@@ -173,12 +174,14 @@ files:
|
|
173
174
|
- lib/bricolage/variables.rb
|
174
175
|
- lib/bricolage/version.rb
|
175
176
|
- libexec/create-lockfile
|
177
|
+
- libexec/mys3dump.jar
|
176
178
|
- libexec/sqldump
|
177
179
|
- libexec/sqldump.Darwin
|
178
180
|
- libexec/sqldump.Linux
|
179
181
|
- test/all.rb
|
180
182
|
- test/home/Gemfile
|
181
183
|
- test/home/Gemfile.lock
|
184
|
+
- test/home/config.yml
|
182
185
|
- test/home/config/development/database.yml
|
183
186
|
- test/home/config/development/password.yml
|
184
187
|
- test/home/config/development/variable.yml
|
@@ -202,6 +205,7 @@ files:
|
|
202
205
|
- test/home/subsys/load_test.ct
|
203
206
|
- test/home/subsys/load_test.job
|
204
207
|
- test/home/subsys/migrate.job
|
208
|
+
- test/home/subsys/my-import.job
|
205
209
|
- test/home/subsys/net1.jobnet
|
206
210
|
- test/home/subsys/net2.jobnet
|
207
211
|
- test/home/subsys/put.job
|
@@ -216,6 +220,7 @@ files:
|
|
216
220
|
- test/home/subsys/some_view-cv.sql.job
|
217
221
|
- test/home/subsys/unified.jobnet
|
218
222
|
- test/home/subsys/unified.sql.job
|
223
|
+
- test/home/subsys/users.ct
|
219
224
|
- test/home/subsys/variable.yml
|
220
225
|
- test/test_c_streaming_load.rb
|
221
226
|
- test/test_filesystem.rb
|