bricolage 5.19.0 → 5.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/jobclass/streaming_load.rb +16 -5
- data/lib/bricolage/mysqldatasource.rb +14 -2
- data/lib/bricolage/version.rb +1 -1
- data/libexec/mys3dump.jar +0 -0
- data/test/home/Gemfile.lock +10 -10
- data/test/home/config/development/database.yml +22 -24
- data/test/home/revert.sh +7 -0
- data/test/home/subsys/load_test.job +6 -4
- data/test/home/subsys/my-import.job +2 -4
- metadata +3 -3
- data/test/home/sqltest.rb +0 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 08ee4bdc2cc0df34636a9d370bd8dcc658f1fa5a
|
|
4
|
+
data.tar.gz: a26fcb9a3150e12572bacc4bd98168ebab68888f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5915c56686b6757f8a059aa3d7bd05ae14ff0d2b8d806cca59392698fcd1ffcdf8e574a3b1204208c9b4f7dbefe92a8984165920b4a400b1c282950e3cc7c649
|
|
7
|
+
data.tar.gz: 30c0701ff39c2e15c2723a0fb4aac188a655c0ceea04c73df7c3097594367e316a563b51a62958cd3e3747f7b0b2ac38caaa7dba0081baac7e3751f4337944c5
|
data/jobclass/streaming_load.rb
CHANGED
|
@@ -18,6 +18,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
18
18
|
optional: true, default: Bricolage::PSQLLoadOptions.new,
|
|
19
19
|
value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
|
|
20
20
|
params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
|
|
21
|
+
params.add Bricolage::StringParam.new('ctl-prefix', 'S3_PREFIX', 'S3 object key prefix for control files. (default: ${queue-path}/ctl)', optional: true)
|
|
22
|
+
params.add Bricolage::OptionalBoolParam.new('keep-ctl', 'Does not delete control files if true.')
|
|
21
23
|
params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
|
|
22
24
|
params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
|
|
23
25
|
params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
|
|
@@ -61,6 +63,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
61
63
|
RedshiftStreamingLoader.new(
|
|
62
64
|
data_source: ds,
|
|
63
65
|
queue: make_s3_queue(params),
|
|
66
|
+
keep_ctl: params['keep-ctl'],
|
|
64
67
|
table: string(params['dest-table']),
|
|
65
68
|
work_table: string(params['work-table']),
|
|
66
69
|
log_table: string(params['log-table']),
|
|
@@ -76,6 +79,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
76
79
|
ds = params['s3-ds']
|
|
77
80
|
S3Queue.new(
|
|
78
81
|
data_source: ds,
|
|
82
|
+
ctl_prefix: (params['ctl-prefix'] || "#{params['queue-path']}/ctl"),
|
|
79
83
|
queue_path: params['queue-path'],
|
|
80
84
|
persistent_path: params['persistent-path'],
|
|
81
85
|
file_name: params['file-name'],
|
|
@@ -88,12 +92,13 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
88
92
|
end
|
|
89
93
|
|
|
90
94
|
class RedshiftStreamingLoader
|
|
91
|
-
def initialize(data_source:, queue:,
|
|
95
|
+
def initialize(data_source:, queue:, keep_ctl:,
|
|
92
96
|
table:, work_table: nil, log_table: nil, load_options: nil,
|
|
93
97
|
sql: nil,
|
|
94
98
|
logger:, noop: false, load_only: false)
|
|
95
99
|
@ds = data_source
|
|
96
100
|
@src = queue
|
|
101
|
+
@keep_ctl = keep_ctl
|
|
97
102
|
@table = table
|
|
98
103
|
@work_table = work_table
|
|
99
104
|
@log_table = log_table
|
|
@@ -200,13 +205,17 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
200
205
|
end
|
|
201
206
|
|
|
202
207
|
def create_manifest_file(objects)
|
|
203
|
-
manifest_name =
|
|
208
|
+
manifest_name = if @keep_ctl
|
|
209
|
+
"#{Time.now.strftime('%Y/%m/%d')}/#{@job_process_id}-#{@table}.json"
|
|
210
|
+
else
|
|
211
|
+
"manifest-#{@job_process_id}.json"
|
|
212
|
+
end
|
|
204
213
|
@logger.info "creating manifest: #{manifest_name}"
|
|
205
214
|
json = make_manifest_json(objects)
|
|
206
215
|
@logger.info "manifest:\n" + json
|
|
207
216
|
url = @src.put_control_file(manifest_name, json, noop: @noop)
|
|
208
217
|
yield url
|
|
209
|
-
@src.remove_control_file(File.basename(url), noop: @noop)
|
|
218
|
+
@src.remove_control_file(File.basename(url), noop: @noop) unless @keep_ctl
|
|
210
219
|
end
|
|
211
220
|
|
|
212
221
|
def make_manifest_json(objects)
|
|
@@ -359,8 +368,9 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
359
368
|
class S3Queue
|
|
360
369
|
extend Forwardable
|
|
361
370
|
|
|
362
|
-
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
|
371
|
+
def initialize(data_source:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
|
|
363
372
|
@ds = data_source
|
|
373
|
+
@ctl_prefix = ctl_prefix
|
|
364
374
|
@queue_path = queue_path
|
|
365
375
|
@persistent_path = persistent_path
|
|
366
376
|
@file_name = file_name
|
|
@@ -399,7 +409,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
|
399
409
|
end
|
|
400
410
|
|
|
401
411
|
def control_file_path(name)
|
|
402
|
-
"#{queue_path}/ctl
|
|
412
|
+
prefix = @ctl_prefix || "#{queue_path}/ctl"
|
|
413
|
+
"#{prefix}/#{name}"
|
|
403
414
|
end
|
|
404
415
|
|
|
405
416
|
def each(&block)
|
|
@@ -183,7 +183,9 @@ module Bricolage
|
|
|
183
183
|
write_concurrency: options['write_concurrency'],
|
|
184
184
|
rotation_size: options['rotation_size'],
|
|
185
185
|
delete_objects: options['delete_objects'],
|
|
186
|
-
object_key_delimiter: options['object_key_delimiter']
|
|
186
|
+
object_key_delimiter: options['object_key_delimiter'],
|
|
187
|
+
src_zone_offset: options['src_zone_offset'],
|
|
188
|
+
dst_zone_offset: options['dst_zone_offset'])
|
|
187
189
|
end
|
|
188
190
|
|
|
189
191
|
class S3Export < Action
|
|
@@ -195,7 +197,9 @@ module Bricolage
|
|
|
195
197
|
write_concurrency: 4,
|
|
196
198
|
rotation_size: nil,
|
|
197
199
|
delete_objects: false,
|
|
198
|
-
object_key_delimiter: nil
|
|
200
|
+
object_key_delimiter: nil,
|
|
201
|
+
src_zone_offset: nil,
|
|
202
|
+
dst_zone_offset: nil)
|
|
199
203
|
@table = table
|
|
200
204
|
@statement = stmt
|
|
201
205
|
@s3ds = s3ds
|
|
@@ -208,6 +212,8 @@ module Bricolage
|
|
|
208
212
|
@rotation_size = rotation_size
|
|
209
213
|
@delete_objects = delete_objects
|
|
210
214
|
@object_key_delimiter = object_key_delimiter
|
|
215
|
+
@src_zone_offset = src_zone_offset
|
|
216
|
+
@dst_zone_offset = dst_zone_offset
|
|
211
217
|
end
|
|
212
218
|
|
|
213
219
|
def run
|
|
@@ -250,6 +256,12 @@ module Bricolage
|
|
|
250
256
|
params[:r] = @rotation_size if @rotation_size
|
|
251
257
|
params[:d] = nil if @delete_objects
|
|
252
258
|
params[:k] = @object_key_delimiter if @object_key_delimiter
|
|
259
|
+
if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
|
|
260
|
+
params[:S] = src_zone_offset
|
|
261
|
+
end
|
|
262
|
+
if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
|
|
263
|
+
params[:T] = dst_zone_offset
|
|
264
|
+
end
|
|
253
265
|
params
|
|
254
266
|
end
|
|
255
267
|
|
data/lib/bricolage/version.rb
CHANGED
data/libexec/mys3dump.jar
CHANGED
|
Binary file
|
data/test/home/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: ../..
|
|
3
3
|
specs:
|
|
4
|
-
bricolage (5.19.
|
|
4
|
+
bricolage (5.19.1)
|
|
5
5
|
aws-sdk (~> 2)
|
|
6
6
|
mysql2
|
|
7
7
|
pg (~> 0.18.0)
|
|
@@ -11,12 +11,12 @@ PATH
|
|
|
11
11
|
GEM
|
|
12
12
|
remote: https://rubygems.org/
|
|
13
13
|
specs:
|
|
14
|
-
aws-sdk (2.6.
|
|
15
|
-
aws-sdk-resources (= 2.6.
|
|
16
|
-
aws-sdk-core (2.6.
|
|
14
|
+
aws-sdk (2.6.14)
|
|
15
|
+
aws-sdk-resources (= 2.6.14)
|
|
16
|
+
aws-sdk-core (2.6.14)
|
|
17
17
|
jmespath (~> 1.0)
|
|
18
|
-
aws-sdk-resources (2.6.
|
|
19
|
-
aws-sdk-core (= 2.6.
|
|
18
|
+
aws-sdk-resources (2.6.14)
|
|
19
|
+
aws-sdk-core (= 2.6.14)
|
|
20
20
|
coderay (1.1.0)
|
|
21
21
|
fluent-logger (0.5.1)
|
|
22
22
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
@@ -47,10 +47,10 @@ GEM
|
|
|
47
47
|
td-logger (~> 0.3.21)
|
|
48
48
|
yajl-ruby (~> 1.1)
|
|
49
49
|
zip-zip (~> 0.3)
|
|
50
|
-
td-client (0.8.
|
|
51
|
-
httpclient (
|
|
50
|
+
td-client (0.8.84)
|
|
51
|
+
httpclient (>= 2.7)
|
|
52
52
|
json (>= 1.7.6)
|
|
53
|
-
msgpack (>= 0.
|
|
53
|
+
msgpack (>= 0.5.6, < 2)
|
|
54
54
|
td-logger (0.3.25)
|
|
55
55
|
fluent-logger (~> 0.5.0)
|
|
56
56
|
msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
|
@@ -67,4 +67,4 @@ DEPENDENCIES
|
|
|
67
67
|
pry
|
|
68
68
|
|
|
69
69
|
BUNDLED WITH
|
|
70
|
-
1.13.
|
|
70
|
+
1.13.6
|
|
@@ -42,16 +42,6 @@ postgres:
|
|
|
42
42
|
encoding: utf8
|
|
43
43
|
sql_log_level: DEBUG
|
|
44
44
|
|
|
45
|
-
td:
|
|
46
|
-
type: td
|
|
47
|
-
database: logs
|
|
48
|
-
apikey: <%= password 'td_tabemiru' %>
|
|
49
|
-
|
|
50
|
-
td_search_log:
|
|
51
|
-
type: td
|
|
52
|
-
database: search_log
|
|
53
|
-
apikey: <%= password 'td_tabemiru' %>
|
|
54
|
-
|
|
55
45
|
s3:
|
|
56
46
|
type: s3
|
|
57
47
|
endpoint: "s3-ap-northeast-1.amazonaws.com"
|
|
@@ -74,27 +64,35 @@ mysql:
|
|
|
74
64
|
password: <%= password 'mysql_shared_work_readonly' %>
|
|
75
65
|
encoding: utf8
|
|
76
66
|
|
|
77
|
-
|
|
78
|
-
type:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
prefix: tmp/strload
|
|
83
|
-
access_key_id: "<%= password 'aws_access_key_id' %>"
|
|
84
|
-
secret_access_key: "<%= password 'aws_secret_access_key' %>"
|
|
67
|
+
sqs_preproc:
|
|
68
|
+
type: sqs
|
|
69
|
+
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
|
|
70
|
+
max_number_of_messages: 10
|
|
71
|
+
visibility_timeout: 60
|
|
85
72
|
|
|
86
|
-
|
|
73
|
+
sqs_dispatch:
|
|
87
74
|
type: sqs
|
|
88
75
|
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
|
|
89
76
|
max_number_of_messages: 10
|
|
90
77
|
visibility_timeout: 60
|
|
91
|
-
access_key_id: "<%= password 'aws_access_key_id' %>"
|
|
92
|
-
secret_access_key: "<%= password 'aws_secret_access_key' %>"
|
|
93
78
|
|
|
94
|
-
|
|
79
|
+
sqs_task:
|
|
95
80
|
type: sqs
|
|
96
81
|
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
|
|
97
82
|
max_number_of_messages: 1
|
|
98
83
|
visibility_timeout: 180
|
|
99
|
-
|
|
100
|
-
|
|
84
|
+
|
|
85
|
+
sns:
|
|
86
|
+
type: sns
|
|
87
|
+
region: "ap-northeast-1"
|
|
88
|
+
topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
|
|
89
|
+
|
|
90
|
+
td:
|
|
91
|
+
type: td
|
|
92
|
+
database: logs
|
|
93
|
+
apikey: <%= password 'td_tabemiru' %>
|
|
94
|
+
|
|
95
|
+
td_search_log:
|
|
96
|
+
type: td
|
|
97
|
+
database: search_log
|
|
98
|
+
apikey: <%= password 'td_tabemiru' %>
|
data/test/home/revert.sh
ADDED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
class: streaming_load
|
|
2
2
|
|
|
3
3
|
s3-ds: s3
|
|
4
|
+
ctl-prefix: ctl
|
|
5
|
+
keep-ctl: true
|
|
4
6
|
queue-path: queue
|
|
5
|
-
persistent-path: save
|
|
7
|
+
persistent-path: save/%Y/%m/%d
|
|
6
8
|
file-name: "%Y%m%d-%H%M_%Q.txt"
|
|
7
9
|
|
|
8
10
|
redshift-ds: sql
|
|
9
|
-
dest-table: load_test
|
|
10
|
-
work-table: load_test_wk
|
|
11
|
-
log-table: load_test_l
|
|
11
|
+
dest-table: $test_schema.load_test
|
|
12
|
+
work-table: $test_schema.load_test_wk
|
|
13
|
+
log-table: $test_schema.load_test_l
|
|
12
14
|
load-options: "delimiter '\\t'"
|
|
@@ -6,10 +6,8 @@ s3-ds: s3
|
|
|
6
6
|
s3-prefix: shimpeko/test-abc-
|
|
7
7
|
gzip: true
|
|
8
8
|
dump-options:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
write_concurrency: 16
|
|
12
|
-
rotation_size: 16000000
|
|
9
|
+
src_zone_offset: "+00:00"
|
|
10
|
+
dst_zone_offset: "+09:00"
|
|
13
11
|
delete_objects: true
|
|
14
12
|
dest-ds: sql
|
|
15
13
|
dest-table: $test_schema.users
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bricolage
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.19.
|
|
4
|
+
version: 5.19.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Minero Aoki
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-
|
|
11
|
+
date: 2016-11-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: pg
|
|
@@ -207,7 +207,7 @@ files:
|
|
|
207
207
|
- test/home/data/20141002-1355_02.txt
|
|
208
208
|
- test/home/data/test.txt
|
|
209
209
|
- test/home/jobnet-test.rb
|
|
210
|
-
- test/home/
|
|
210
|
+
- test/home/revert.sh
|
|
211
211
|
- test/home/subsys/d.ct
|
|
212
212
|
- test/home/subsys/insert.sql.job
|
|
213
213
|
- test/home/subsys/job1.job
|
data/test/home/sqltest.rb
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
require 'bricolage/commandlineapplication'
|
|
2
|
-
require 'pp'
|
|
3
|
-
|
|
4
|
-
app = Bricolage::CommandLineApplication.define {|opts|
|
|
5
|
-
opts.data_source_option('--ds', 'Target data source', short: '-D', kind: 'sql')
|
|
6
|
-
}
|
|
7
|
-
ds = app.data_source('--ds')
|
|
8
|
-
ds.open {|conn|
|
|
9
|
-
#task_ids = conn.query_values('select task_id from strload_tasks')
|
|
10
|
-
#p [task_ids.class, task_ids.size]
|
|
11
|
-
|
|
12
|
-
conn.update('create table t (x int)')
|
|
13
|
-
pp conn.query_value('select count(*) from t')
|
|
14
|
-
conn.update('drop table t')
|
|
15
|
-
pp conn.query_value('select count(*) from t')
|
|
16
|
-
}
|