bricolage 5.19.0 → 5.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/jobclass/streaming_load.rb +16 -5
- data/lib/bricolage/mysqldatasource.rb +14 -2
- data/lib/bricolage/version.rb +1 -1
- data/libexec/mys3dump.jar +0 -0
- data/test/home/Gemfile.lock +10 -10
- data/test/home/config/development/database.yml +22 -24
- data/test/home/revert.sh +7 -0
- data/test/home/subsys/load_test.job +6 -4
- data/test/home/subsys/my-import.job +2 -4
- metadata +3 -3
- data/test/home/sqltest.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08ee4bdc2cc0df34636a9d370bd8dcc658f1fa5a
|
4
|
+
data.tar.gz: a26fcb9a3150e12572bacc4bd98168ebab68888f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5915c56686b6757f8a059aa3d7bd05ae14ff0d2b8d806cca59392698fcd1ffcdf8e574a3b1204208c9b4f7dbefe92a8984165920b4a400b1c282950e3cc7c649
|
7
|
+
data.tar.gz: 30c0701ff39c2e15c2723a0fb4aac188a655c0ceea04c73df7c3097594367e316a563b51a62958cd3e3747f7b0b2ac38caaa7dba0081baac7e3751f4337944c5
|
data/jobclass/streaming_load.rb
CHANGED
@@ -18,6 +18,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
18
18
|
optional: true, default: Bricolage::PSQLLoadOptions.new,
|
19
19
|
value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
|
20
20
|
params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
|
21
|
+
params.add Bricolage::StringParam.new('ctl-prefix', 'S3_PREFIX', 'S3 object key prefix for control files. (default: ${queue-path}/ctl)', optional: true)
|
22
|
+
params.add Bricolage::OptionalBoolParam.new('keep-ctl', 'Does not delete control files if true.')
|
21
23
|
params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
|
22
24
|
params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
|
23
25
|
params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
|
@@ -61,6 +63,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
61
63
|
RedshiftStreamingLoader.new(
|
62
64
|
data_source: ds,
|
63
65
|
queue: make_s3_queue(params),
|
66
|
+
keep_ctl: params['keep-ctl'],
|
64
67
|
table: string(params['dest-table']),
|
65
68
|
work_table: string(params['work-table']),
|
66
69
|
log_table: string(params['log-table']),
|
@@ -76,6 +79,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
76
79
|
ds = params['s3-ds']
|
77
80
|
S3Queue.new(
|
78
81
|
data_source: ds,
|
82
|
+
ctl_prefix: (params['ctl-prefix'] || "#{params['queue-path']}/ctl"),
|
79
83
|
queue_path: params['queue-path'],
|
80
84
|
persistent_path: params['persistent-path'],
|
81
85
|
file_name: params['file-name'],
|
@@ -88,12 +92,13 @@ class StreamingLoadJobClass < RubyJobClass
|
|
88
92
|
end
|
89
93
|
|
90
94
|
class RedshiftStreamingLoader
|
91
|
-
def initialize(data_source:, queue:,
|
95
|
+
def initialize(data_source:, queue:, keep_ctl:,
|
92
96
|
table:, work_table: nil, log_table: nil, load_options: nil,
|
93
97
|
sql: nil,
|
94
98
|
logger:, noop: false, load_only: false)
|
95
99
|
@ds = data_source
|
96
100
|
@src = queue
|
101
|
+
@keep_ctl = keep_ctl
|
97
102
|
@table = table
|
98
103
|
@work_table = work_table
|
99
104
|
@log_table = log_table
|
@@ -200,13 +205,17 @@ class StreamingLoadJobClass < RubyJobClass
|
|
200
205
|
end
|
201
206
|
|
202
207
|
def create_manifest_file(objects)
|
203
|
-
manifest_name =
|
208
|
+
manifest_name = if @keep_ctl
|
209
|
+
"#{Time.now.strftime('%Y/%m/%d')}/#{@job_process_id}-#{@table}.json"
|
210
|
+
else
|
211
|
+
"manifest-#{@job_process_id}.json"
|
212
|
+
end
|
204
213
|
@logger.info "creating manifest: #{manifest_name}"
|
205
214
|
json = make_manifest_json(objects)
|
206
215
|
@logger.info "manifest:\n" + json
|
207
216
|
url = @src.put_control_file(manifest_name, json, noop: @noop)
|
208
217
|
yield url
|
209
|
-
@src.remove_control_file(File.basename(url), noop: @noop)
|
218
|
+
@src.remove_control_file(File.basename(url), noop: @noop) unless @keep_ctl
|
210
219
|
end
|
211
220
|
|
212
221
|
def make_manifest_json(objects)
|
@@ -359,8 +368,9 @@ class StreamingLoadJobClass < RubyJobClass
|
|
359
368
|
class S3Queue
|
360
369
|
extend Forwardable
|
361
370
|
|
362
|
-
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
371
|
+
def initialize(data_source:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
|
363
372
|
@ds = data_source
|
373
|
+
@ctl_prefix = ctl_prefix
|
364
374
|
@queue_path = queue_path
|
365
375
|
@persistent_path = persistent_path
|
366
376
|
@file_name = file_name
|
@@ -399,7 +409,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
399
409
|
end
|
400
410
|
|
401
411
|
def control_file_path(name)
|
402
|
-
"#{queue_path}/ctl
|
412
|
+
prefix = @ctl_prefix || "#{queue_path}/ctl"
|
413
|
+
"#{prefix}/#{name}"
|
403
414
|
end
|
404
415
|
|
405
416
|
def each(&block)
|
@@ -183,7 +183,9 @@ module Bricolage
|
|
183
183
|
write_concurrency: options['write_concurrency'],
|
184
184
|
rotation_size: options['rotation_size'],
|
185
185
|
delete_objects: options['delete_objects'],
|
186
|
-
object_key_delimiter: options['object_key_delimiter']
|
186
|
+
object_key_delimiter: options['object_key_delimiter'],
|
187
|
+
src_zone_offset: options['src_zone_offset'],
|
188
|
+
dst_zone_offset: options['dst_zone_offset'])
|
187
189
|
end
|
188
190
|
|
189
191
|
class S3Export < Action
|
@@ -195,7 +197,9 @@ module Bricolage
|
|
195
197
|
write_concurrency: 4,
|
196
198
|
rotation_size: nil,
|
197
199
|
delete_objects: false,
|
198
|
-
object_key_delimiter: nil
|
200
|
+
object_key_delimiter: nil,
|
201
|
+
src_zone_offset: nil,
|
202
|
+
dst_zone_offset: nil)
|
199
203
|
@table = table
|
200
204
|
@statement = stmt
|
201
205
|
@s3ds = s3ds
|
@@ -208,6 +212,8 @@ module Bricolage
|
|
208
212
|
@rotation_size = rotation_size
|
209
213
|
@delete_objects = delete_objects
|
210
214
|
@object_key_delimiter = object_key_delimiter
|
215
|
+
@src_zone_offset = src_zone_offset
|
216
|
+
@dst_zone_offset = dst_zone_offset
|
211
217
|
end
|
212
218
|
|
213
219
|
def run
|
@@ -250,6 +256,12 @@ module Bricolage
|
|
250
256
|
params[:r] = @rotation_size if @rotation_size
|
251
257
|
params[:d] = nil if @delete_objects
|
252
258
|
params[:k] = @object_key_delimiter if @object_key_delimiter
|
259
|
+
if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
|
260
|
+
params[:S] = src_zone_offset
|
261
|
+
end
|
262
|
+
if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
|
263
|
+
params[:T] = dst_zone_offset
|
264
|
+
end
|
253
265
|
params
|
254
266
|
end
|
255
267
|
|
data/lib/bricolage/version.rb
CHANGED
data/libexec/mys3dump.jar
CHANGED
Binary file
|
data/test/home/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: ../..
|
3
3
|
specs:
|
4
|
-
bricolage (5.19.
|
4
|
+
bricolage (5.19.1)
|
5
5
|
aws-sdk (~> 2)
|
6
6
|
mysql2
|
7
7
|
pg (~> 0.18.0)
|
@@ -11,12 +11,12 @@ PATH
|
|
11
11
|
GEM
|
12
12
|
remote: https://rubygems.org/
|
13
13
|
specs:
|
14
|
-
aws-sdk (2.6.
|
15
|
-
aws-sdk-resources (= 2.6.
|
16
|
-
aws-sdk-core (2.6.
|
14
|
+
aws-sdk (2.6.14)
|
15
|
+
aws-sdk-resources (= 2.6.14)
|
16
|
+
aws-sdk-core (2.6.14)
|
17
17
|
jmespath (~> 1.0)
|
18
|
-
aws-sdk-resources (2.6.
|
19
|
-
aws-sdk-core (= 2.6.
|
18
|
+
aws-sdk-resources (2.6.14)
|
19
|
+
aws-sdk-core (= 2.6.14)
|
20
20
|
coderay (1.1.0)
|
21
21
|
fluent-logger (0.5.1)
|
22
22
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
@@ -47,10 +47,10 @@ GEM
|
|
47
47
|
td-logger (~> 0.3.21)
|
48
48
|
yajl-ruby (~> 1.1)
|
49
49
|
zip-zip (~> 0.3)
|
50
|
-
td-client (0.8.
|
51
|
-
httpclient (
|
50
|
+
td-client (0.8.84)
|
51
|
+
httpclient (>= 2.7)
|
52
52
|
json (>= 1.7.6)
|
53
|
-
msgpack (>= 0.
|
53
|
+
msgpack (>= 0.5.6, < 2)
|
54
54
|
td-logger (0.3.25)
|
55
55
|
fluent-logger (~> 0.5.0)
|
56
56
|
msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
@@ -67,4 +67,4 @@ DEPENDENCIES
|
|
67
67
|
pry
|
68
68
|
|
69
69
|
BUNDLED WITH
|
70
|
-
1.13.
|
70
|
+
1.13.6
|
@@ -42,16 +42,6 @@ postgres:
|
|
42
42
|
encoding: utf8
|
43
43
|
sql_log_level: DEBUG
|
44
44
|
|
45
|
-
td:
|
46
|
-
type: td
|
47
|
-
database: logs
|
48
|
-
apikey: <%= password 'td_tabemiru' %>
|
49
|
-
|
50
|
-
td_search_log:
|
51
|
-
type: td
|
52
|
-
database: search_log
|
53
|
-
apikey: <%= password 'td_tabemiru' %>
|
54
|
-
|
55
45
|
s3:
|
56
46
|
type: s3
|
57
47
|
endpoint: "s3-ap-northeast-1.amazonaws.com"
|
@@ -74,27 +64,35 @@ mysql:
|
|
74
64
|
password: <%= password 'mysql_shared_work_readonly' %>
|
75
65
|
encoding: utf8
|
76
66
|
|
77
|
-
|
78
|
-
type:
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
prefix: tmp/strload
|
83
|
-
access_key_id: "<%= password 'aws_access_key_id' %>"
|
84
|
-
secret_access_key: "<%= password 'aws_secret_access_key' %>"
|
67
|
+
sqs_preproc:
|
68
|
+
type: sqs
|
69
|
+
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
|
70
|
+
max_number_of_messages: 10
|
71
|
+
visibility_timeout: 60
|
85
72
|
|
86
|
-
|
73
|
+
sqs_dispatch:
|
87
74
|
type: sqs
|
88
75
|
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
|
89
76
|
max_number_of_messages: 10
|
90
77
|
visibility_timeout: 60
|
91
|
-
access_key_id: "<%= password 'aws_access_key_id' %>"
|
92
|
-
secret_access_key: "<%= password 'aws_secret_access_key' %>"
|
93
78
|
|
94
|
-
|
79
|
+
sqs_task:
|
95
80
|
type: sqs
|
96
81
|
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
|
97
82
|
max_number_of_messages: 1
|
98
83
|
visibility_timeout: 180
|
99
|
-
|
100
|
-
|
84
|
+
|
85
|
+
sns:
|
86
|
+
type: sns
|
87
|
+
region: "ap-northeast-1"
|
88
|
+
topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
|
89
|
+
|
90
|
+
td:
|
91
|
+
type: td
|
92
|
+
database: logs
|
93
|
+
apikey: <%= password 'td_tabemiru' %>
|
94
|
+
|
95
|
+
td_search_log:
|
96
|
+
type: td
|
97
|
+
database: search_log
|
98
|
+
apikey: <%= password 'td_tabemiru' %>
|
data/test/home/revert.sh
ADDED
@@ -1,12 +1,14 @@
|
|
1
1
|
class: streaming_load
|
2
2
|
|
3
3
|
s3-ds: s3
|
4
|
+
ctl-prefix: ctl
|
5
|
+
keep-ctl: true
|
4
6
|
queue-path: queue
|
5
|
-
persistent-path: save
|
7
|
+
persistent-path: save/%Y/%m/%d
|
6
8
|
file-name: "%Y%m%d-%H%M_%Q.txt"
|
7
9
|
|
8
10
|
redshift-ds: sql
|
9
|
-
dest-table: load_test
|
10
|
-
work-table: load_test_wk
|
11
|
-
log-table: load_test_l
|
11
|
+
dest-table: $test_schema.load_test
|
12
|
+
work-table: $test_schema.load_test_wk
|
13
|
+
log-table: $test_schema.load_test_l
|
12
14
|
load-options: "delimiter '\\t'"
|
@@ -6,10 +6,8 @@ s3-ds: s3
|
|
6
6
|
s3-prefix: shimpeko/test-abc-
|
7
7
|
gzip: true
|
8
8
|
dump-options:
|
9
|
-
|
10
|
-
|
11
|
-
write_concurrency: 16
|
12
|
-
rotation_size: 16000000
|
9
|
+
src_zone_offset: "+00:00"
|
10
|
+
dst_zone_offset: "+09:00"
|
13
11
|
delete_objects: true
|
14
12
|
dest-ds: sql
|
15
13
|
dest-table: $test_schema.users
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.19.
|
4
|
+
version: 5.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -207,7 +207,7 @@ files:
|
|
207
207
|
- test/home/data/20141002-1355_02.txt
|
208
208
|
- test/home/data/test.txt
|
209
209
|
- test/home/jobnet-test.rb
|
210
|
-
- test/home/
|
210
|
+
- test/home/revert.sh
|
211
211
|
- test/home/subsys/d.ct
|
212
212
|
- test/home/subsys/insert.sql.job
|
213
213
|
- test/home/subsys/job1.job
|
data/test/home/sqltest.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'bricolage/commandlineapplication'
|
2
|
-
require 'pp'
|
3
|
-
|
4
|
-
app = Bricolage::CommandLineApplication.define {|opts|
|
5
|
-
opts.data_source_option('--ds', 'Target data source', short: '-D', kind: 'sql')
|
6
|
-
}
|
7
|
-
ds = app.data_source('--ds')
|
8
|
-
ds.open {|conn|
|
9
|
-
#task_ids = conn.query_values('select task_id from strload_tasks')
|
10
|
-
#p [task_ids.class, task_ids.size]
|
11
|
-
|
12
|
-
conn.update('create table t (x int)')
|
13
|
-
pp conn.query_value('select count(*) from t')
|
14
|
-
conn.update('drop table t')
|
15
|
-
pp conn.query_value('select count(*) from t')
|
16
|
-
}
|