bricolage 5.20.0 → 5.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bb66877b6bc66492a6091a11eafbd9bb953f95e5
4
- data.tar.gz: a31a4383dad5d6b8b1a319fa3a2d68900f57db55
3
+ metadata.gz: 952dae79a52b71032a1356582777888388e8727f
4
+ data.tar.gz: ec3a98ec8bfcdbd2fd26639555cd603794f28700
5
5
  SHA512:
6
- metadata.gz: c2b402f7f2392a0dec1b4ed72753abed5a9683cf3860b3c6a906d86a1901abbccd9f88d5a4c7d6e52bd6e4ba235e4de0e8ce355971c3d91e064946224f8c91b7
7
- data.tar.gz: c3cf0419ce2a8dc470ed15b9d9837bf8bf32cbfbc440d655813f167d101d2b9ebcb56d21099bd0653e79e44eba85b4062e189179cf4b4ff00e8b98ee1481c2fe
6
+ metadata.gz: 500bda6bef8694fa64285f9a76900f34fbcc14ea1c4169bc3b2c855c708b90837b78e293aa145225bed75a90847f0062e1b91ee8d905ae5dfa70012e152b8caf
7
+ data.tar.gz: a51c47307e485bf4042a745d1eb0d01b1d1fe1d7daf2e5a55d991fa594069975df2b4a98e2c8591c8204b04a33c8b7a27a8a91ebb3cdaaae0d5eb8b43d1604c9
@@ -0,0 +1,66 @@
1
+ require 'bricolage/psqldatasource'
2
+ require 'bricolage/mysqldatasource'
3
+
4
+ JobClass.define('my-import-delta') {
5
+ parameters {|params|
6
+ # S3Export
7
+ params.add SrcTableParam.new(optional: false)
8
+ params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
9
+ params.add SQLFileParam.new(optional: true)
10
+ params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
11
+ params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
12
+ params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
13
+
14
+ # Delete, Load
15
+ params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
16
+ params.add StringParam.new('delete-cond', 'SQL_EXPR', 'DELETE condition.')
17
+ params.add DestTableParam.new(optional: false)
18
+ params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
19
+ optional: true, default: PSQLLoadOptions.new,
20
+ value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
21
+
22
+ # Misc
23
+ params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
24
+ params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
25
+ params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
26
+
27
+ # All
28
+ params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
29
+ params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
30
+ params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
31
+ }
32
+
33
+ script {|params, script|
34
+ run_all = !params['export'] && !params['load']
35
+
36
+ # S3Export
37
+ if params['export'] || run_all
38
+ script.task(params['src-ds']) {|task|
39
+ task.s3export params['src-tables'].values.first.to_s,
40
+ params['sql-file'],
41
+ params['s3-ds'],
42
+ params['s3-prefix'],
43
+ params['gzip'],
44
+ dump_options: params['dump-options']
45
+ }
46
+ end
47
+
48
+ # Load
49
+ if params['load'] || run_all
50
+ script.task(params['dest-ds']) {|task|
51
+ task.transaction {
52
+ # DELETE
53
+ task.exec SQLStatement.delete_where(params['delete-cond']) if params['delete-cond']
54
+
55
+ # COPY
56
+ task.load params['s3-ds'], params['s3-prefix'], params['dest-table'],
57
+ 'json', nil, params['options'].merge('gzip' => params['gzip'])
58
+ }
59
+
60
+ # VACUUM, ANALYZE
61
+ task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table']
62
+ task.analyze_if params['analyze'], params['dest-table']
63
+ }
64
+ end
65
+ }
66
+ }
@@ -247,7 +247,7 @@ module Bricolage
247
247
  def command_parameters
248
248
  params = {jar: mys3dump_path.to_s, h: ds.host, P: ds.port.to_s, D: ds.database, u: ds.username, p: ds.password, o: connection_property, t: @table,
249
249
  'Daws.accessKeyId' => @s3ds.access_key, 'Daws.secretKey' => @s3ds.secret_key, b: @s3ds.bucket.name, x: @prefix}
250
- params[:q] = @statement.stripped_source.chop if @statement
250
+ params[:q] = @statement.stripped_source.chomp(';') if @statement
251
251
  params[:f] = @format if @format
252
252
  params[:C] = nil if @gzip
253
253
  params[:c] = @partition_column if @partition_column
@@ -125,7 +125,17 @@ module Bricolage
125
125
  end
126
126
 
127
127
  def traverse(rel, no_prefix: false)
128
- bucket.objects(prefix: path(rel, no_prefix: no_prefix))
128
+ retries = client.config.retry_limit
129
+ begin
130
+ bucket.objects(prefix: path(rel, no_prefix: no_prefix))
131
+ rescue Aws::Xml::Parser::ParsingError => e
132
+ retries -= 1
133
+ if retries >= 0
134
+ logger.warn "Retry Bucket#objects() for XML parsing error: #{e.message}"
135
+ retry
136
+ end
137
+ raise
138
+ end
129
139
  end
130
140
  end
131
141
 
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.20.0'
3
+ VERSION = '5.20.1'
4
4
  end
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- bricolage (5.20.0)
4
+ bricolage (5.20.1)
5
5
  aws-sdk (~> 2)
6
6
  mysql2
7
7
  pg (~> 0.18.0)
@@ -11,51 +11,53 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- aws-sdk (2.6.14)
15
- aws-sdk-resources (= 2.6.14)
16
- aws-sdk-core (2.6.14)
14
+ aws-sdk (2.6.36)
15
+ aws-sdk-resources (= 2.6.36)
16
+ aws-sdk-core (2.6.36)
17
+ aws-sigv4 (~> 1.0)
17
18
  jmespath (~> 1.0)
18
- aws-sdk-resources (2.6.14)
19
- aws-sdk-core (= 2.6.14)
19
+ aws-sdk-resources (2.6.36)
20
+ aws-sdk-core (= 2.6.36)
21
+ aws-sigv4 (1.0.0)
20
22
  coderay (1.1.0)
21
- fluent-logger (0.5.1)
22
- msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
23
+ fluent-logger (0.6.1)
24
+ msgpack (>= 0.5.6, < 2)
23
25
  hirb (0.7.3)
24
- httpclient (2.8.2.4)
26
+ httpclient (2.8.3)
25
27
  jmespath (1.3.1)
26
28
  json (2.0.2)
27
29
  method_source (0.8.2)
28
- msgpack (0.5.12)
29
- mysql2 (0.4.4)
30
+ msgpack (1.0.2)
31
+ mysql2 (0.4.5)
30
32
  parallel (1.8.0)
31
33
  pg (0.18.4)
32
34
  pry (0.10.3)
33
35
  coderay (~> 1.1.0)
34
36
  method_source (~> 0.8.1)
35
37
  slop (~> 3.4)
36
- redis (3.3.1)
37
- ruby-progressbar (1.7.5)
38
+ redis (3.3.2)
39
+ ruby-progressbar (1.8.1)
38
40
  rubyzip (1.1.7)
39
41
  slop (3.6.0)
40
- td (0.15.0)
42
+ td (0.15.2)
41
43
  hirb (>= 0.4.5)
42
- msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
44
+ msgpack
43
45
  parallel (~> 1.8.0)
44
- ruby-progressbar (~> 1.7.5)
46
+ ruby-progressbar (~> 1.7)
45
47
  rubyzip (~> 1.1.7)
46
- td-client (~> 0.8.82)
47
- td-logger (~> 0.3.21)
48
+ td-client (~> 0.8.85)
49
+ td-logger (>= 0.3.21, < 2)
48
50
  yajl-ruby (~> 1.1)
49
51
  zip-zip (~> 0.3)
50
- td-client (0.8.84)
52
+ td-client (0.8.85)
51
53
  httpclient (>= 2.7)
52
54
  json (>= 1.7.6)
53
55
  msgpack (>= 0.5.6, < 2)
54
- td-logger (0.3.25)
55
- fluent-logger (~> 0.5.0)
56
- msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
56
+ td-logger (0.3.26)
57
+ fluent-logger (>= 0.5.0, < 2.0)
58
+ msgpack (>= 0.5.6, < 2.0)
57
59
  td-client (~> 0.8.66)
58
- yajl-ruby (1.2.1)
60
+ yajl-ruby (1.3.0)
59
61
  zip-zip (0.3)
60
62
  rubyzip (>= 1.0.0)
61
63
 
@@ -56,7 +56,6 @@ s3:
56
56
 
57
57
  mysql:
58
58
  type: mysql
59
- #host: db-main-slave-free-001
60
59
  host: 127.0.0.1
61
60
  port: 9011
62
61
  database: main
@@ -64,6 +63,15 @@ mysql:
64
63
  password: <%= password 'mysql_shared_work_readonly' %>
65
64
  encoding: utf8
66
65
 
66
+ mysql_summary:
67
+ type: mysql
68
+ host: 127.0.0.1
69
+ port: 40201
70
+ database: summarized_data
71
+ username: work_readonly
72
+ password: <%= password 'mysql_shared_work_readonly' %>
73
+ encoding: utf8
74
+
67
75
  sqs_preproc:
68
76
  type: sqs
69
77
  url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
@@ -0,0 +1,8 @@
1
+ create table item_pv_acc
2
+ ( data_date date encode lzo
3
+ , item_id integer encode lzo
4
+ , pv bigint encode lzo
5
+ , uu bigint encode lzo
6
+ )
7
+ sortkey (data_date, item_id)
8
+ ;
@@ -0,0 +1,40 @@
1
+ /*
2
+ class: my-import-delta
3
+ src-ds: mysql_summary
4
+ src-tables:
5
+ item_pv: summarized_data.daily_recipe_pvs
6
+ s3-ds: s3
7
+ s3-prefix: bricolage/my-import-delta/$data_date/data.
8
+ gzip: true
9
+ dump-options:
10
+ partition_column: id
11
+ delete_objects: true
12
+ dest-ds: sql
13
+ dest-table: $test_schema.item_pv_acc
14
+ delete-cond: "data_date = '$data_date'"
15
+ options:
16
+ statupdate: false
17
+ compupdate: false
18
+ maxerror: 0
19
+ #acceptinvchars: " "
20
+ #trimblanks: true
21
+ #truncatecolumns: true
22
+ ## datetime
23
+ #acceptanydate: true
24
+ #dateformat: "auto"
25
+ #timeformat: "auto"
26
+ #vacuum-sort: true
27
+ analyze: true
28
+ */
29
+
30
+ select
31
+ coalesce(`date`) as data_date
32
+ , coalesce(recipe_id) as item_id
33
+ , coalesce(page_view) as pv
34
+ , coalesce(unique_ip) as uu
35
+ from
36
+ $item_pv
37
+ where
38
+ `date` = date '$data_date'
39
+ and (@PARTITION_CONDITION@)
40
+ ;
@@ -1,3 +1,4 @@
1
+ data_date: "<%= (Date.today - 1).strftime('%Y-%m-%d') %>"
1
2
  defaults:
2
3
  analyze: true
3
4
  grant:
@@ -0,0 +1,40 @@
1
+ require 'test/unit'
2
+ require 'mocha/test_unit'
3
+ require 'bricolage/s3datasource'
4
+ require 'pp'
5
+ require 'aws-sdk'
6
+
7
+ module Bricolage
8
+ class TestS3datasouce < Test::Unit::TestCase
9
+
10
+ def setup
11
+ @ds = S3DataSource.new
12
+ @ds.stubs(:logger).returns(Logger.new)
13
+ end
14
+
15
+ test "traverse without exception" do
16
+ bucket = mock()
17
+ bucket.stubs(:objects).returns(true)
18
+ @ds.stubs(:bucket).returns(bucket)
19
+ assert @ds.traverse(nil)
20
+ end
21
+
22
+ test "traverse with 2 exception" do
23
+ bucket = mock()
24
+ bucket.stubs(:objects).raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
25
+ raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.returns(true)
26
+ @ds.stubs(:bucket).returns(bucket)
27
+ assert @ds.traverse(nil)
28
+ end
29
+
30
+ test "traverse with more than 3 exception" do
31
+ bucket = mock()
32
+ bucket.stubs(:objects).raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
33
+ raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
34
+ raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.
35
+ raises(Aws::Xml::Parser::ParsingError.new("test message","0","test column")).then.returns(true)
36
+ @ds.stubs(:bucket).returns(bucket)
37
+ assert_raise(Aws::Xml::Parser::ParsingError) {@ds.traverse(nil)}
38
+ end
39
+ end
40
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.20.0
4
+ version: 5.20.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-11 00:00:00.000000000 Z
11
+ date: 2016-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: mocha
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
125
139
  description: Redshift-oriented Data Warehouse Batch Framework
126
140
  email: aamine@loveruby.net
127
141
  executables:
@@ -140,6 +154,7 @@ files:
140
154
  - jobclass/insert.rb
141
155
  - jobclass/load.rb
142
156
  - jobclass/my-export.rb
157
+ - jobclass/my-import-delta.rb
143
158
  - jobclass/my-import.rb
144
159
  - jobclass/my-migrate.rb
145
160
  - jobclass/noop.rb
@@ -210,6 +225,7 @@ files:
210
225
  - test/home/revert.sh
211
226
  - test/home/subsys/d.ct
212
227
  - test/home/subsys/insert.sql.job
228
+ - test/home/subsys/item_pv_acc.ct
213
229
  - test/home/subsys/job1.job
214
230
  - test/home/subsys/job2.job
215
231
  - test/home/subsys/job3.job
@@ -221,6 +237,7 @@ files:
221
237
  - test/home/subsys/load_test.ct
222
238
  - test/home/subsys/load_test.job
223
239
  - test/home/subsys/migrate.job
240
+ - test/home/subsys/my-import-delta.sql.job
224
241
  - test/home/subsys/my-import.job
225
242
  - test/home/subsys/net1.jobnet
226
243
  - test/home/subsys/net2.jobnet
@@ -241,6 +258,7 @@ files:
241
258
  - test/test_c_streaming_load.rb
242
259
  - test/test_filesystem.rb
243
260
  - test/test_parameters.rb
261
+ - test/test_s3datasource.rb
244
262
  - test/test_variables.rb
245
263
  - test/vacuum-test.rb
246
264
  homepage: https://github.com/aamine/bricolage