bricolage 5.20.0 → 5.20.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/jobclass/my-import-delta.rb +66 -0
- data/lib/bricolage/mysqldatasource.rb +1 -1
- data/lib/bricolage/s3datasource.rb +11 -1
- data/lib/bricolage/version.rb +1 -1
- data/test/home/Gemfile.lock +25 -23
- data/test/home/config/development/database.yml +9 -1
- data/test/home/subsys/item_pv_acc.ct +8 -0
- data/test/home/subsys/my-import-delta.sql.job +40 -0
- data/test/home/subsys/variable.yml +1 -0
- data/test/test_s3datasource.rb +40 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 952dae79a52b71032a1356582777888388e8727f
|
4
|
+
data.tar.gz: ec3a98ec8bfcdbd2fd26639555cd603794f28700
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 500bda6bef8694fa64285f9a76900f34fbcc14ea1c4169bc3b2c855c708b90837b78e293aa145225bed75a90847f0062e1b91ee8d905ae5dfa70012e152b8caf
|
7
|
+
data.tar.gz: a51c47307e485bf4042a745d1eb0d01b1d1fe1d7daf2e5a55d991fa594069975df2b4a98e2c8591c8204b04a33c8b7a27a8a91ebb3cdaaae0d5eb8b43d1604c9
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'bricolage/psqldatasource'
|
2
|
+
require 'bricolage/mysqldatasource'
|
3
|
+
|
4
|
+
JobClass.define('my-import-delta') {
|
5
|
+
parameters {|params|
|
6
|
+
# S3Export
|
7
|
+
params.add SrcTableParam.new(optional: false)
|
8
|
+
params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
|
9
|
+
params.add SQLFileParam.new(optional: true)
|
10
|
+
params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
|
11
|
+
params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
|
12
|
+
params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
|
13
|
+
|
14
|
+
# Delete, Load
|
15
|
+
params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
|
16
|
+
params.add StringParam.new('delete-cond', 'SQL_EXPR', 'DELETE condition.')
|
17
|
+
params.add DestTableParam.new(optional: false)
|
18
|
+
params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
|
19
|
+
optional: true, default: PSQLLoadOptions.new,
|
20
|
+
value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
|
21
|
+
|
22
|
+
# Misc
|
23
|
+
params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
|
24
|
+
params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
|
25
|
+
params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
|
26
|
+
|
27
|
+
# All
|
28
|
+
params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
|
29
|
+
params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
|
30
|
+
params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
|
31
|
+
}
|
32
|
+
|
33
|
+
script {|params, script|
|
34
|
+
run_all = !params['export'] && !params['load']
|
35
|
+
|
36
|
+
# S3Export
|
37
|
+
if params['export'] || run_all
|
38
|
+
script.task(params['src-ds']) {|task|
|
39
|
+
task.s3export params['src-tables'].values.first.to_s,
|
40
|
+
params['sql-file'],
|
41
|
+
params['s3-ds'],
|
42
|
+
params['s3-prefix'],
|
43
|
+
params['gzip'],
|
44
|
+
dump_options: params['dump-options']
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
# Load
|
49
|
+
if params['load'] || run_all
|
50
|
+
script.task(params['dest-ds']) {|task|
|
51
|
+
task.transaction {
|
52
|
+
# DELETE
|
53
|
+
task.exec SQLStatement.delete_where(params['delete-cond']) if params['delete-cond']
|
54
|
+
|
55
|
+
# COPY
|
56
|
+
task.load params['s3-ds'], params['s3-prefix'], params['dest-table'],
|
57
|
+
'json', nil, params['options'].merge('gzip' => params['gzip'])
|
58
|
+
}
|
59
|
+
|
60
|
+
# VACUUM, ANALYZE
|
61
|
+
task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table']
|
62
|
+
task.analyze_if params['analyze'], params['dest-table']
|
63
|
+
}
|
64
|
+
end
|
65
|
+
}
|
66
|
+
}
|
@@ -247,7 +247,7 @@ module Bricolage
|
|
247
247
|
def command_parameters
|
248
248
|
params = {jar: mys3dump_path.to_s, h: ds.host, P: ds.port.to_s, D: ds.database, u: ds.username, p: ds.password, o: connection_property, t: @table,
|
249
249
|
'Daws.accessKeyId' => @s3ds.access_key, 'Daws.secretKey' => @s3ds.secret_key, b: @s3ds.bucket.name, x: @prefix}
|
250
|
-
params[:q] = @statement.stripped_source.
|
250
|
+
params[:q] = @statement.stripped_source.chomp(';') if @statement
|
251
251
|
params[:f] = @format if @format
|
252
252
|
params[:C] = nil if @gzip
|
253
253
|
params[:c] = @partition_column if @partition_column
|
@@ -125,7 +125,17 @@ module Bricolage
|
|
125
125
|
end
|
126
126
|
|
127
127
|
def traverse(rel, no_prefix: false)
|
128
|
-
|
128
|
+
retries = client.config.retry_limit
|
129
|
+
begin
|
130
|
+
bucket.objects(prefix: path(rel, no_prefix: no_prefix))
|
131
|
+
rescue Aws::Xml::Parser::ParsingError => e
|
132
|
+
retries -= 1
|
133
|
+
if retries >= 0
|
134
|
+
logger.warn "Retry Bucket#objects() for XML parsing error: #{e.message}"
|
135
|
+
retry
|
136
|
+
end
|
137
|
+
raise
|
138
|
+
end
|
129
139
|
end
|
130
140
|
end
|
131
141
|
|
data/lib/bricolage/version.rb
CHANGED
data/test/home/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: ../..
|
3
3
|
specs:
|
4
|
-
bricolage (5.20.
|
4
|
+
bricolage (5.20.1)
|
5
5
|
aws-sdk (~> 2)
|
6
6
|
mysql2
|
7
7
|
pg (~> 0.18.0)
|
@@ -11,51 +11,53 @@ PATH
|
|
11
11
|
GEM
|
12
12
|
remote: https://rubygems.org/
|
13
13
|
specs:
|
14
|
-
aws-sdk (2.6.
|
15
|
-
aws-sdk-resources (= 2.6.
|
16
|
-
aws-sdk-core (2.6.
|
14
|
+
aws-sdk (2.6.36)
|
15
|
+
aws-sdk-resources (= 2.6.36)
|
16
|
+
aws-sdk-core (2.6.36)
|
17
|
+
aws-sigv4 (~> 1.0)
|
17
18
|
jmespath (~> 1.0)
|
18
|
-
aws-sdk-resources (2.6.
|
19
|
-
aws-sdk-core (= 2.6.
|
19
|
+
aws-sdk-resources (2.6.36)
|
20
|
+
aws-sdk-core (= 2.6.36)
|
21
|
+
aws-sigv4 (1.0.0)
|
20
22
|
coderay (1.1.0)
|
21
|
-
fluent-logger (0.
|
22
|
-
msgpack (>= 0.
|
23
|
+
fluent-logger (0.6.1)
|
24
|
+
msgpack (>= 0.5.6, < 2)
|
23
25
|
hirb (0.7.3)
|
24
|
-
httpclient (2.8.
|
26
|
+
httpclient (2.8.3)
|
25
27
|
jmespath (1.3.1)
|
26
28
|
json (2.0.2)
|
27
29
|
method_source (0.8.2)
|
28
|
-
msgpack (0.
|
29
|
-
mysql2 (0.4.
|
30
|
+
msgpack (1.0.2)
|
31
|
+
mysql2 (0.4.5)
|
30
32
|
parallel (1.8.0)
|
31
33
|
pg (0.18.4)
|
32
34
|
pry (0.10.3)
|
33
35
|
coderay (~> 1.1.0)
|
34
36
|
method_source (~> 0.8.1)
|
35
37
|
slop (~> 3.4)
|
36
|
-
redis (3.3.
|
37
|
-
ruby-progressbar (1.
|
38
|
+
redis (3.3.2)
|
39
|
+
ruby-progressbar (1.8.1)
|
38
40
|
rubyzip (1.1.7)
|
39
41
|
slop (3.6.0)
|
40
|
-
td (0.15.
|
42
|
+
td (0.15.2)
|
41
43
|
hirb (>= 0.4.5)
|
42
|
-
msgpack
|
44
|
+
msgpack
|
43
45
|
parallel (~> 1.8.0)
|
44
|
-
ruby-progressbar (~> 1.7
|
46
|
+
ruby-progressbar (~> 1.7)
|
45
47
|
rubyzip (~> 1.1.7)
|
46
|
-
td-client (~> 0.8.
|
47
|
-
td-logger (
|
48
|
+
td-client (~> 0.8.85)
|
49
|
+
td-logger (>= 0.3.21, < 2)
|
48
50
|
yajl-ruby (~> 1.1)
|
49
51
|
zip-zip (~> 0.3)
|
50
|
-
td-client (0.8.
|
52
|
+
td-client (0.8.85)
|
51
53
|
httpclient (>= 2.7)
|
52
54
|
json (>= 1.7.6)
|
53
55
|
msgpack (>= 0.5.6, < 2)
|
54
|
-
td-logger (0.3.
|
55
|
-
fluent-logger (
|
56
|
-
msgpack (>= 0.
|
56
|
+
td-logger (0.3.26)
|
57
|
+
fluent-logger (>= 0.5.0, < 2.0)
|
58
|
+
msgpack (>= 0.5.6, < 2.0)
|
57
59
|
td-client (~> 0.8.66)
|
58
|
-
yajl-ruby (1.
|
60
|
+
yajl-ruby (1.3.0)
|
59
61
|
zip-zip (0.3)
|
60
62
|
rubyzip (>= 1.0.0)
|
61
63
|
|
@@ -56,7 +56,6 @@ s3:
|
|
56
56
|
|
57
57
|
mysql:
|
58
58
|
type: mysql
|
59
|
-
#host: db-main-slave-free-001
|
60
59
|
host: 127.0.0.1
|
61
60
|
port: 9011
|
62
61
|
database: main
|
@@ -64,6 +63,15 @@ mysql:
|
|
64
63
|
password: <%= password 'mysql_shared_work_readonly' %>
|
65
64
|
encoding: utf8
|
66
65
|
|
66
|
+
mysql_summary:
|
67
|
+
type: mysql
|
68
|
+
host: 127.0.0.1
|
69
|
+
port: 40201
|
70
|
+
database: summarized_data
|
71
|
+
username: work_readonly
|
72
|
+
password: <%= password 'mysql_shared_work_readonly' %>
|
73
|
+
encoding: utf8
|
74
|
+
|
67
75
|
sqs_preproc:
|
68
76
|
type: sqs
|
69
77
|
url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
|
@@ -0,0 +1,40 @@
|
|
1
|
+
/*
|
2
|
+
class: my-import-delta
|
3
|
+
src-ds: mysql_summary
|
4
|
+
src-tables:
|
5
|
+
item_pv: summarized_data.daily_recipe_pvs
|
6
|
+
s3-ds: s3
|
7
|
+
s3-prefix: bricolage/my-import-delta/$data_date/data.
|
8
|
+
gzip: true
|
9
|
+
dump-options:
|
10
|
+
partition_column: id
|
11
|
+
delete_objects: true
|
12
|
+
dest-ds: sql
|
13
|
+
dest-table: $test_schema.item_pv_acc
|
14
|
+
delete-cond: "data_date = '$data_date'"
|
15
|
+
options:
|
16
|
+
statupdate: false
|
17
|
+
compupdate: false
|
18
|
+
maxerror: 0
|
19
|
+
#acceptinvchars: " "
|
20
|
+
#trimblanks: true
|
21
|
+
#truncatecolumns: true
|
22
|
+
## datetime
|
23
|
+
#acceptanydate: true
|
24
|
+
#dateformat: "auto"
|
25
|
+
#timeformat: "auto"
|
26
|
+
#vacuum-sort: true
|
27
|
+
analyze: true
|
28
|
+
*/
|
29
|
+
|
30
|
+
select
|
31
|
+
coalesce(`date`) as data_date
|
32
|
+
, coalesce(recipe_id) as item_id
|
33
|
+
, coalesce(page_view) as pv
|
34
|
+
, coalesce(unique_ip) as uu
|
35
|
+
from
|
36
|
+
$item_pv
|
37
|
+
where
|
38
|
+
`date` = date '$data_date'
|
39
|
+
and (@PARTITION_CONDITION@)
|
40
|
+
;
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'mocha/test_unit'
|
3
|
+
require 'bricolage/s3datasource'
|
4
|
+
require 'pp'
|
5
|
+
require 'aws-sdk'
|
6
|
+
|
7
|
+
module Bricolage
|
8
|
+
class TestS3datasouce < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@ds = S3DataSource.new
|
12
|
+
@ds.stubs(:logger).returns(Logger.new)
|
13
|
+
end
|
14
|
+
|
15
|
+
test "traverse without exception" do
|
16
|
+
bucket = mock()
|
17
|
+
bucket.stubs(:objects).returns(true)
|
18
|
+
@ds.stubs(:bucket).returns(bucket)
|
19
|
+
assert @ds.traverse(nil)
|
20
|
+
end
|
21
|
+
|
22
|
+
test "traverse with 2 exception" do
|
23
|
+
bucket = mock()
|
24
|
+
bucket.stubs(:objects).raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
|
25
|
+
raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.returns(true)
|
26
|
+
@ds.stubs(:bucket).returns(bucket)
|
27
|
+
assert @ds.traverse(nil)
|
28
|
+
end
|
29
|
+
|
30
|
+
test "traverse with more than 3 exception" do
|
31
|
+
bucket = mock()
|
32
|
+
bucket.stubs(:objects).raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
|
33
|
+
raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
|
34
|
+
raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
|
35
|
+
raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.returns(true)
|
36
|
+
@ds.stubs(:bucket).returns(bucket)
|
37
|
+
assert_raise(Aws::Xml::Parser::ParsingError) {@ds.traverse(nil)}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.20.
|
4
|
+
version: 5.20.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: mocha
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
125
139
|
description: Redshift-oriented Data Warehouse Batch Framework
|
126
140
|
email: aamine@loveruby.net
|
127
141
|
executables:
|
@@ -140,6 +154,7 @@ files:
|
|
140
154
|
- jobclass/insert.rb
|
141
155
|
- jobclass/load.rb
|
142
156
|
- jobclass/my-export.rb
|
157
|
+
- jobclass/my-import-delta.rb
|
143
158
|
- jobclass/my-import.rb
|
144
159
|
- jobclass/my-migrate.rb
|
145
160
|
- jobclass/noop.rb
|
@@ -210,6 +225,7 @@ files:
|
|
210
225
|
- test/home/revert.sh
|
211
226
|
- test/home/subsys/d.ct
|
212
227
|
- test/home/subsys/insert.sql.job
|
228
|
+
- test/home/subsys/item_pv_acc.ct
|
213
229
|
- test/home/subsys/job1.job
|
214
230
|
- test/home/subsys/job2.job
|
215
231
|
- test/home/subsys/job3.job
|
@@ -221,6 +237,7 @@ files:
|
|
221
237
|
- test/home/subsys/load_test.ct
|
222
238
|
- test/home/subsys/load_test.job
|
223
239
|
- test/home/subsys/migrate.job
|
240
|
+
- test/home/subsys/my-import-delta.sql.job
|
224
241
|
- test/home/subsys/my-import.job
|
225
242
|
- test/home/subsys/net1.jobnet
|
226
243
|
- test/home/subsys/net2.jobnet
|
@@ -241,6 +258,7 @@ files:
|
|
241
258
|
- test/test_c_streaming_load.rb
|
242
259
|
- test/test_filesystem.rb
|
243
260
|
- test/test_parameters.rb
|
261
|
+
- test/test_s3datasource.rb
|
244
262
|
- test/test_variables.rb
|
245
263
|
- test/vacuum-test.rb
|
246
264
|
homepage: https://github.com/aamine/bricolage
|