bricolage 5.25.1 → 5.26.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b26f9e8d81a62e099e26e9c7f0bf3394c10ff754
4
- data.tar.gz: d2201749ca5fc7fee832116ab88071e5a65502f0
3
+ metadata.gz: 7331a52e0d7ce7f6e37a033455c4bbd0cd20ec98
4
+ data.tar.gz: 8c794e3df428209e2467e8edd7dca371d109eb17
5
5
  SHA512:
6
- metadata.gz: 40f1006c34882e1718445aaa26eb1bb0e3695eb9763b325c116132636a0c1b4aba57a7bb8112162c5a02bf643c56e3cc4eb6622a876717118b7d05e2a4e8c77e
7
- data.tar.gz: 8e62c714a483670c1307e4b7eda9bd3bfaf746c44232aa4d6437034166e32ffa6d3ae5349d981a9a280587875fb1cdbe5d68c4ee97943287f27da5c31570153f
6
+ metadata.gz: 8f58e1983098afd0dace568e5f32e415e12e50e39be6a04502ef9cab9347c1325370c4304de4859d481d2b1ad5246aeaf816bc7971ca7f5adb0747f1af47ad19
7
+ data.tar.gz: c9831a9e1c8f5694654e69ed695f8886db98b3fe46e9a8ef7384a9f120bd8890e2557a14f90318665f0ad713cb087b6ce1ae103e9943c55aef98eb71ac684ff8
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.25.1'
3
+ VERSION = '5.26.0'
4
4
  end
@@ -1,12 +1,24 @@
1
+ PATH
2
+ remote: ../../../bricolage-mysql
3
+ specs:
4
+ bricolage-mysql (5.26.0)
5
+ bricolage (>= 5.26.0)
6
+ mysql2
7
+
8
+ PATH
9
+ remote: ../../../bricolage-redis
10
+ specs:
11
+ bricolage-redis (5.26.0)
12
+ bricolage (>= 5.26.0)
13
+ redis (~> 3)
14
+
1
15
  PATH
2
16
  remote: ../..
3
17
  specs:
4
- bricolage (5.25.0)
18
+ bricolage (5.26.0)
5
19
  aws-sdk-s3 (~> 1)
6
20
  aws-sdk-sns (~> 1)
7
- mysql2
8
21
  pg (~> 0.18.0)
9
- redis (>= 3.0.0)
10
22
 
11
23
  GEM
12
24
  remote: https://rubygems.org/
@@ -35,13 +47,15 @@ GEM
35
47
  pry (0.11.3)
36
48
  coderay (~> 1.1.0)
37
49
  method_source (~> 0.9.0)
38
- redis (4.0.1)
50
+ redis (3.3.3)
39
51
 
40
52
  PLATFORMS
41
53
  ruby
42
54
 
43
55
  DEPENDENCIES
44
56
  bricolage!
57
+ bricolage-mysql!
58
+ bricolage-redis!
45
59
  pry
46
60
 
47
61
  BUNDLED WITH
@@ -79,29 +79,16 @@ mysql_summary:
79
79
  password: <%= password 'mysql_shared_work_readonly' %>
80
80
  encoding: utf8
81
81
 
82
- sqs_preproc:
83
- type: sqs
84
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
85
- max_number_of_messages: 10
86
- visibility_timeout: 60
87
-
88
- sqs_dispatch:
89
- type: sqs
90
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
91
- max_number_of_messages: 10
92
- visibility_timeout: 60
93
-
94
- sqs_task:
95
- type: sqs
96
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
97
- max_number_of_messages: 1
98
- visibility_timeout: 180
99
-
100
82
  sns:
101
83
  type: sns
102
84
  region: "ap-northeast-1"
103
85
  topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
104
86
 
87
+ redis:
88
+ type: redis
89
+ host: localhost
90
+ port: 6379
91
+
105
92
  td:
106
93
  type: td
107
94
  database: logs
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.25.1
4
+ version: 5.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -52,34 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1'
55
- - !ruby/object:Gem::Dependency
56
- name: mysql2
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: redis
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 3.0.0
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 3.0.0
83
55
  - !ruby/object:Gem::Dependency
84
56
  name: test-unit
85
57
  requirement: !ruby/object:Gem::Requirement
@@ -154,14 +126,9 @@ files:
154
126
  - jobclass/insert-delta.rb
155
127
  - jobclass/insert.rb
156
128
  - jobclass/load.rb
157
- - jobclass/my-export.rb
158
- - jobclass/my-import-delta.rb
159
- - jobclass/my-import.rb
160
- - jobclass/my-migrate.rb
161
129
  - jobclass/noop.rb
162
130
  - jobclass/rebuild-drop.rb
163
131
  - jobclass/rebuild-rename.rb
164
- - jobclass/redis-export.rb
165
132
  - jobclass/s3-put.rb
166
133
  - jobclass/sql.rb
167
134
  - jobclass/streaming_load.rb
@@ -190,11 +157,9 @@ files:
190
157
  - lib/bricolage/logger.rb
191
158
  - lib/bricolage/loglocator.rb
192
159
  - lib/bricolage/loglocatorbuilder.rb
193
- - lib/bricolage/mysqldatasource.rb
194
160
  - lib/bricolage/parameters.rb
195
161
  - lib/bricolage/postgresconnection.rb
196
162
  - lib/bricolage/psqldatasource.rb
197
- - lib/bricolage/redisdatasource.rb
198
163
  - lib/bricolage/resource.rb
199
164
  - lib/bricolage/rubyjobclass.rb
200
165
  - lib/bricolage/s3datasource.rb
@@ -208,10 +173,6 @@ files:
208
173
  - lib/bricolage/variables.rb
209
174
  - lib/bricolage/version.rb
210
175
  - libexec/create-lockfile
211
- - libexec/mys3dump.jar
212
- - libexec/sqldump
213
- - libexec/sqldump.Darwin
214
- - libexec/sqldump.Linux
215
176
  - test/all.rb
216
177
  - test/home/Gemfile
217
178
  - test/home/Gemfile.lock
@@ -219,7 +180,6 @@ files:
219
180
  - test/home/config/development/database.yml
220
181
  - test/home/config/development/password.yml
221
182
  - test/home/config/development/variable.yml
222
- - test/home/config/streamingload.yml
223
183
  - test/home/data/20141002-1355_00.txt
224
184
  - test/home/data/20141002-1355_01.txt
225
185
  - test/home/data/20141002-1355_02.txt
@@ -1,40 +0,0 @@
1
- JobClass.define('my-export') {
2
- parameters {|params|
3
- params.add SQLFileParam.new(optional: true)
4
- params.add DestFileParam.new
5
- params.add SrcTableParam.new
6
- params.add EnumParam.new('format', %w(json tsv csv), 'Target file format.', default: 'json')
7
- params.add OptionalBoolParam.new('gzip', 'If true, compresses target file by gzip.')
8
- params.add OptionalBoolParam.new('override', 'If true, clears target file. Otherwise causes error.')
9
- params.add OptionalBoolParam.new('sqldump', 'If true, clears use sqldump command to dump, only wheen usable.')
10
- params.add DataSourceParam.new('mysql')
11
- }
12
-
13
- declarations {|params|
14
- sql_statement(params).declarations
15
- }
16
-
17
- script {|params, script|
18
- script.task(params['data-source']) {|task|
19
- task.export sql_statement(params),
20
- path: params['dest-file'],
21
- format: params['format'],
22
- override: params['override'],
23
- gzip: params['gzip'],
24
- sqldump: params['sqldump']
25
- }
26
- }
27
-
28
- def sql_statement(params)
29
- if sql = params['sql-file']
30
- sql
31
- else
32
- srcs = params['src-tables']
33
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
34
- src_table_var = srcs.keys.first
35
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
36
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
37
- stmt
38
- end
39
- end
40
- }
@@ -1,66 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/mysqldatasource'
3
-
4
- JobClass.define('my-import-delta') {
5
- parameters {|params|
6
- # S3Export
7
- params.add SrcTableParam.new(optional: false)
8
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
9
- params.add SQLFileParam.new(optional: true)
10
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
11
- params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
12
- params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
13
-
14
- # Delete, Load
15
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
16
- params.add StringParam.new('delete-cond', 'SQL_EXPR', 'DELETE condition.')
17
- params.add DestTableParam.new(optional: false)
18
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
19
- optional: true, default: PSQLLoadOptions.new,
20
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
21
-
22
- # Misc
23
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
24
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
25
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
26
-
27
- # All
28
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
29
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
30
- params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
31
- }
32
-
33
- script {|params, script|
34
- run_all = !params['export'] && !params['load']
35
-
36
- # S3Export
37
- if params['export'] || run_all
38
- script.task(params['src-ds']) {|task|
39
- task.s3export params['src-tables'].values.first.to_s,
40
- params['sql-file'],
41
- params['s3-ds'],
42
- params['s3-prefix'],
43
- params['gzip'],
44
- dump_options: params['dump-options']
45
- }
46
- end
47
-
48
- # Load
49
- if params['load'] || run_all
50
- script.task(params['dest-ds']) {|task|
51
- task.transaction {
52
- # DELETE
53
- task.exec SQLStatement.delete_where(params['delete-cond']) if params['delete-cond']
54
-
55
- # COPY
56
- task.load params['s3-ds'], params['s3-prefix'], params['dest-table'],
57
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
58
- }
59
-
60
- # VACUUM, ANALYZE
61
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table']
62
- task.analyze_if params['analyze'], params['dest-table']
63
- }
64
- end
65
- }
66
- }
@@ -1,84 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/mysqldatasource'
3
-
4
- JobClass.define('my-import') {
5
- parameters {|params|
6
- # S3Export
7
- params.add SrcTableParam.new(optional: false)
8
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
9
- params.add SQLFileParam.new(optional: true)
10
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
11
- params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
12
- params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
13
-
14
- # Load
15
- params.add DestTableParam.new(optional: false)
16
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
17
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
18
- optional: true, default: PSQLLoadOptions.new,
19
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
20
- params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
21
- params.add OptionalBoolParam.new('no-backup', 'Do not backup current table with suffix "_old".', default: false)
22
-
23
- # Misc
24
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
25
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
26
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
27
- params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
28
-
29
- # All
30
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
31
- params.add OptionalBoolParam.new('put', 'Runs PUT task.')
32
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
33
- params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
34
- }
35
-
36
- script {|params, script|
37
- run_all = !params['export'] && !params['put'] && !params['load']
38
-
39
- # S3Export
40
- if params['export'] || run_all
41
- script.task(params['src-ds']) {|task|
42
- task.s3export params['src-tables'].keys.first,
43
- params['sql-file'],
44
- params['s3-ds'],
45
- params['s3-prefix'],
46
- params['gzip'],
47
- dump_options: params['dump-options']
48
- }
49
- end
50
-
51
- # Load
52
- if params['load'] || run_all
53
- script.task(params['dest-ds']) {|task|
54
- prev_table = '${dest_table}_old'
55
- work_table = '${dest_table}_wk'
56
-
57
- task.transaction {
58
- # CREATE
59
- task.drop_force prev_table
60
- task.drop_force work_table
61
- task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
62
-
63
- # COPY
64
- task.load params['s3-ds'], params['s3-prefix'], work_table,
65
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
66
-
67
- # GRANT, ANALYZE
68
- task.grant_if params['grant'], work_table
69
- task.analyze_if params['analyze'], work_table
70
-
71
- # RENAME
72
- task.create_dummy_table '${dest_table}'
73
- task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
74
- task.rename_table work_table, params['dest-table'].name
75
- }
76
-
77
- task.drop_force prev_table if params['no-backup']
78
-
79
- # VACUUM: vacuum is needless for newly created table, applying vacuum after exposure is not a problem.
80
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table'].to_s
81
- }
82
- end
83
- }
84
- }
@@ -1,116 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
-
3
- JobClass.define('my-migrate') {
4
- parameters {|params|
5
- # Export
6
- params.add SrcTableParam.new(optional: false)
7
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
8
- params.add DestFileParam.new('tmp-file', 'PATH', 'Temporary local file path.')
9
- params.add OptionalBoolParam.new('sqldump', 'If true, use sqldump command to dump, only on available.', default: true)
10
- params.add SQLFileParam.new(optional: true)
11
-
12
- # Put
13
- params.add DestFileParam.new('s3-file', 'PATH', 'Temporary S3 file path.')
14
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
15
- params.add OptionalBoolParam.new('override', 'If true, overwrite s3 target file. Otherwise causes error.')
16
- params.add OptionalBoolParam.new('remove-tmp', 'Removes temporary local files after S3-PUT is succeeded.')
17
-
18
- # Load
19
- params.add DestTableParam.new(optional: false)
20
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
21
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
22
- optional: true, default: PSQLLoadOptions.new,
23
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
24
- params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
25
- params.add OptionalBoolParam.new('no-backup', 'Do not backup current table with suffix "_old".', default: false)
26
-
27
- # Misc
28
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
29
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
30
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
31
- params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
32
-
33
- # All
34
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
35
- params.add OptionalBoolParam.new('put', 'Runs PUT task.')
36
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
37
- params.add OptionalBoolParam.new('gzip', 'If true, compresses target file by gzip.', default: true)
38
- }
39
-
40
- declarations {|params|
41
- decls = sql_statement(params).declarations
42
- decls.declare 'dest-table', nil
43
- decls
44
- }
45
-
46
- script {|params, script|
47
- run_all = !params['export'] && !params['put'] && !params['load']
48
-
49
- # Export
50
- if params['export'] || run_all
51
- script.task(params['src-ds']) {|task|
52
- task.export sql_statement(params),
53
- path: params['tmp-file'],
54
- format: 'json',
55
- override: true,
56
- gzip: params['gzip'],
57
- sqldump: params['sqldump']
58
- }
59
- end
60
-
61
- # Put
62
- if params['put'] || run_all
63
- script.task(params['s3-ds']) {|task|
64
- task.put params['tmp-file'], params['s3-file'], check_args: false
65
- }
66
- if params['remove-tmp']
67
- script.task(params.file_ds) {|task|
68
- task.remove params['tmp-file']
69
- }
70
- end
71
- end
72
-
73
- # Load
74
- if params['load'] || run_all
75
- script.task(params['dest-ds']) {|task|
76
- prev_table = '${dest_table}_old'
77
- work_table = '${dest_table}_wk'
78
-
79
- task.transaction {
80
- # CREATE
81
- task.drop_force prev_table
82
- task.drop_force work_table
83
- task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
84
-
85
- # COPY
86
- task.load params['s3-ds'], params['s3-file'], work_table,
87
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
88
-
89
- # GRANT, ANALYZE
90
- task.grant_if params['grant'], work_table
91
- task.analyze_if params['analyze'], work_table
92
-
93
- # RENAME
94
- task.create_dummy_table '${dest_table}'
95
- task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
96
- task.rename_table work_table, params['dest-table'].name
97
- }
98
-
99
- task.drop_force prev_table if params['no-backup']
100
-
101
- # VACUUM: vacuum is needless for newly created table, applying vacuum after exposure is not a problem.
102
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table'].to_s
103
- }
104
- end
105
- }
106
-
107
- def sql_statement(params)
108
- return params['sql-file'] if params['sql-file']
109
- srcs = params['src-tables']
110
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
111
- src_table_var = srcs.keys.first
112
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
113
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
114
- stmt
115
- end
116
- }
@@ -1,42 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/redisdatasource'
3
- require 'redis'
4
-
5
- JobClass.define('redis-export') {
6
- parameters {|params|
7
- # Export
8
- params.add DataSourceParam.new('psql', 'src-ds', 'Source data source.')
9
- params.add SrcTableParam.new(optional: false)
10
- params.add SQLFileParam.new(optional: true)
11
-
12
- # Redis import
13
- params.add DataSourceParam.new('redis', 'dest-ds', 'Redis cluster')
14
- params.add StringParam.new('key-column', 'REDIS_KEY', 'Redis object key. default: id', optional: true)
15
- params.add StringParam.new('prefix', 'REDIS_PREFIX', 'Redis object key prefix', optional: true)
16
- params.add StringParam.new('encode', 'REDIS_ENCODE', 'Redis object encoding. default: hash', optional: true)
17
- params.add StringParam.new('expire', 'REDIS_TTL', 'Redis object TTL. default: none', optional: true)
18
- }
19
-
20
- script {|params, script|
21
- # Export
22
- script.task(params['dest-ds']) {|task|
23
- task.import params['src-ds'],
24
- params['src-tables'].first,
25
- sql_statement(params),
26
- params['key-column'] || "id",
27
- params['prefix'],
28
- params['encode'] || "hash",
29
- expire: params['expire'].to_i
30
- }
31
- }
32
-
33
- def sql_statement(params)
34
- return params['sql-file'] if params['sql-file']
35
- srcs = params['src-tables']
36
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
37
- src_table_var = srcs.keys.first
38
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
39
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
40
- stmt
41
- end
42
- }
@@ -1,363 +0,0 @@
1
- require 'bricolage/datasource'
2
- require 'mysql2'
3
- require 'json'
4
- require 'csv'
5
- require 'stringio'
6
- require 'open3'
7
-
8
- module Bricolage
9
-
10
- class MySQLDataSource < DataSource
11
- declare_type 'mysql'
12
-
13
- def initialize(**mysql_options)
14
- @mysql_options = mysql_options
15
- @client = nil
16
- end
17
-
18
- attr_reader :mysql_options
19
-
20
- def host
21
- @mysql_options[:host]
22
- end
23
-
24
- def port
25
- @mysql_options[:port]
26
- end
27
-
28
- def username
29
- @mysql_options[:username]
30
- end
31
-
32
- def password
33
- @mysql_options[:password]
34
- end
35
-
36
- def database
37
- @mysql_options[:database]
38
- end
39
-
40
- def new_task
41
- MySQLTask.new(self)
42
- end
43
-
44
- def open
45
- @client = Mysql2::Client.new(**@mysql_options)
46
- begin
47
- yield self
48
- ensure
49
- c = @client
50
- @client = nil
51
- c.close
52
- end
53
- end
54
-
55
- def query(sql, **opts)
56
- logger.info "[SQL] #{sql}"
57
- connection_check
58
- @client.query(sql, **opts)
59
- end
60
-
61
- private
62
-
63
- def connection_check
64
- unless @client
65
- raise FatalError, "#{self.class} used outside of \#open block"
66
- end
67
- end
68
- end
69
-
70
- class MySQLTask < DataSourceTask
71
- def export(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
72
- add Export.new(stmt, path: path, format: format, override: override, gzip: gzip, sqldump: sqldump)
73
- end
74
-
75
- class Export < Action
76
- def initialize(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
77
- @statement = stmt
78
- @path = path
79
- @format = format
80
- @override = override
81
- @gzip = gzip
82
- @sqldump = sqldump
83
- end
84
-
85
- def bind(*args)
86
- @statement.bind(*args)
87
- end
88
-
89
- def source
90
- @statement.stripped_source
91
- end
92
-
93
- def run
94
- if @sqldump and sqldump_available? and sqldump_usable?
95
- export_by_sqldump
96
- else
97
- export_by_ruby
98
- end
99
- JobResult.success
100
- end
101
-
102
- def export_by_sqldump
103
- cmds = [[{"SQLDUMP_PASSWORD" => ds.password}, sqldump_path.to_s, "--#{@format}", ds.host, ds.port.to_s, ds.username, ds.database, @statement.stripped_source]]
104
- cmds.push [GZIP_COMMAND] if @gzip
105
- cmds.last.push({out: @path.to_s})
106
- ds.logger.info '[CMD] ' + format_pipeline(cmds)
107
- statuses = Open3.pipeline(*cmds)
108
- statuses.each_with_index do |st, idx|
109
- unless st.success?
110
- cmd = cmds[idx].first
111
- raise JobFailure, "sqldump failed (status #{st.to_i})"
112
- end
113
- end
114
- end
115
-
116
- def format_pipeline(cmds)
117
- cmds = cmds.map {|args| args[0].kind_of?(Hash) ? args[1..-1] : args.dup } # do not show env
118
- cmds.map {|args| %Q("#{args.join('" "')}") }.join(' | ')
119
- end
120
-
121
- def sqldump_available?
122
- sqldump_real_path.executable?
123
- end
124
-
125
- def sqldump_path
126
- Pathname(__dir__).parent.parent + "libexec/sqldump"
127
- end
128
-
129
- def sqldump_real_path
130
- Pathname("#{sqldump_path}.#{platform_name}")
131
- end
132
-
133
- def platform_name
134
- @platform_name ||= `uname -s`.strip
135
- end
136
-
137
- def sqldump_usable?
138
- %w[json tsv].include?(@format)
139
- end
140
-
141
- def export_by_ruby
142
- ds.logger.info "exporting table into #{@path} ..."
143
- count = 0
144
- open_target_file(@path) {|f|
145
- writer_class = WRITER_CLASSES[@format] or raise ArgumentError, "unknown export format: #{@format.inspect}"
146
- writer = writer_class.new(f)
147
- rs = ds.query(@statement.stripped_source, as: writer_class.record_format, stream: true, cache_rows: false)
148
- ds.logger.info "got result set, writing..."
149
- rs.each do |values|
150
- writer.write_record values
151
- count += 1
152
- ds.logger.info "#{count} records exported..." if count % 10_0000 == 0
153
- end
154
- }
155
- ds.logger.info "#{count} records exported; export finished"
156
- end
157
-
158
- private
159
-
160
- # FIXME: parameterize
161
- GZIP_COMMAND = 'gzip'
162
-
163
- def open_target_file(path, &block)
164
- unless @override
165
- raise JobFailure, "destination file already exists: #{path}" if File.exist?(path)
166
- end
167
- if @gzip
168
- ds.logger.info "enable compression: gzip"
169
- IO.popen(%Q(#{GZIP_COMMAND} > "#{path}"), 'w', &block)
170
- else
171
- File.open(path, 'w', &block)
172
- end
173
- end
174
- end
175
-
176
- def s3export(table, stmt, s3ds, prefix, gzip, dump_options)
177
- options = dump_options.nil? ? {} : dump_options[:dump_options]
178
- add S3Export.new(table, stmt, s3ds, prefix, gzip: gzip,
179
- format: options['format'],
180
- partition_column: options['partition_column'],
181
- partition_number: options['partition_number'],
182
- write_concurrency: options['write_concurrency'],
183
- rotation_size: options['rotation_size'],
184
- delete_objects: options['delete_objects'],
185
- object_key_delimiter: options['object_key_delimiter'],
186
- src_zone_offset: options['src_zone_offset'],
187
- dst_zone_offset: options['dst_zone_offset'])
188
- end
189
-
190
- class S3Export < Action
191
-
192
- def initialize(table, stmt, s3ds, prefix, gzip: true,
193
- format: "json",
194
- partition_column: nil,
195
- partition_number: 4,
196
- write_concurrency: 4,
197
- rotation_size: nil,
198
- delete_objects: false,
199
- object_key_delimiter: nil,
200
- src_zone_offset: nil,
201
- dst_zone_offset: nil)
202
- @table = table
203
- @statement = stmt
204
- @s3ds = s3ds
205
- @prefix = build_prefix @s3ds.prefix, prefix
206
- @format = format
207
- @gzip = gzip
208
- @partition_column = partition_column
209
- @partition_number = partition_number
210
- @write_concurrency = write_concurrency
211
- @rotation_size = rotation_size
212
- @delete_objects = delete_objects
213
- @object_key_delimiter = object_key_delimiter
214
- @src_zone_offset = src_zone_offset
215
- @dst_zone_offset = dst_zone_offset
216
- end
217
-
218
- def run
219
- s3export
220
- JobResult.success
221
- end
222
-
223
- def bind(*args)
224
- @statement.bind(*args) if @statement
225
- end
226
-
227
- def source
228
- "-- myexport #{@table} -> #{@s3ds.bucket_name}/#{@prefix}" +
229
- (@statement ? "\n#{@statement.stripped_source}" : "")
230
- end
231
-
232
- def s3export
233
- cmd = build_cmd(command_parameters)
234
- ds.logger.info "[CMD] #{cmd}"
235
- out, st = Open3.capture2e(environment_variables, cmd)
236
- ds.logger.info "[CMDOUT] #{out}"
237
- unless st.success?
238
- msg = extract_exception_message(out)
239
- raise JobFailure, "mys3dump failed (status: #{st.to_i}): #{msg}"
240
- end
241
- end
242
-
243
- def environment_variables
244
- {
245
- 'AWS_ACCESS_KEY_ID' => @s3ds.access_key,
246
- 'AWS_SECRET_ACCESS_KEY' => @s3ds.secret_key,
247
- 'MYS3DUMP_PASSWORD' => ds.password
248
- }
249
- end
250
-
251
- def command_parameters
252
- params = {
253
- jar: mys3dump_path.to_s,
254
- h: ds.host,
255
- P: ds.port.to_s,
256
- D: ds.database,
257
- u: ds.username,
258
- #p: ds.password,
259
- o: connection_property,
260
- t: @table,
261
- b: @s3ds.bucket.name,
262
- x: @prefix
263
- }
264
- params[:q] = @statement.stripped_source.chomp(';') if @statement
265
- params[:f] = @format if @format
266
- params[:C] = nil if @gzip
267
- params[:c] = @partition_column if @partition_column
268
- params[:n] = @partition_number if @partition_number
269
- params[:w] = @write_concurrency if @write_concurrency
270
- params[:r] = @rotation_size if @rotation_size
271
- params[:d] = nil if @delete_objects
272
- params[:k] = @object_key_delimiter if @object_key_delimiter
273
- if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
274
- params[:S] = src_zone_offset
275
- end
276
- if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
277
- params[:T] = dst_zone_offset
278
- end
279
- params
280
- end
281
-
282
- OPTION_MAP = {
283
- encoding: 'useUnicode=true&characterEncoding',
284
- read_timeout: 'netTimeoutForStreamingResults',
285
- connect_timeout: 'connectTimeout',
286
- reconnect: 'autoReconnect',
287
- collation: 'connectionCollation'
288
- }
289
-
290
- def connection_property
291
- ds.mysql_options.map {|k, v| opt = OPTION_MAP[k] ; opt ? "#{opt}=#{v}" : nil }.compact.join('&')
292
- end
293
-
294
- def build_prefix(ds_prefix, pm_prefix)
295
- ((ds_prefix || "") + "//" + (pm_prefix.to_s || "")).gsub(%r<\A/>, '').gsub(%r<//>, '/')
296
- end
297
-
298
- def mys3dump_path
299
- Pathname(__dir__).parent.parent + "libexec/mys3dump.jar"
300
- end
301
-
302
- def build_cmd(options)
303
- (['java'] + options.flat_map {|k, v| v ? ["-#{k}", v.to_s] : ["-#{k}"] }.map {|o| %Q("#{o}") }).join(" ")
304
- end
305
-
306
- def extract_exception_message(out)
307
- out.lines do |line|
308
- if /^.*Exception: (?<msg>.*)$/ =~ line
309
- return msg
310
- end
311
- end
312
- end
313
- end
314
-
315
- WRITER_CLASSES = {}
316
-
317
- class JSONWriter
318
- def JSONWriter.record_format
319
- :hash
320
- end
321
-
322
- def initialize(f)
323
- @f = f
324
- end
325
-
326
- def write_record(values)
327
- @f.puts JSON.dump(values)
328
- end
329
- end
330
- WRITER_CLASSES['json'] = JSONWriter
331
-
332
- class TSVWriter
333
- def TSVWriter.record_format
334
- :array
335
- end
336
-
337
- def initialize(f)
338
- @f = f
339
- end
340
-
341
- def write_record(values)
342
- @f.puts values.join("\t")
343
- end
344
- end
345
- WRITER_CLASSES['tsv'] = TSVWriter
346
-
347
- class CSVWriter
348
- def CSVWriter.record_format
349
- :array
350
- end
351
-
352
- def initialize(f)
353
- @csv = CSV.new(f)
354
- end
355
-
356
- def write_record(values)
357
- @csv.add_row values
358
- end
359
- end
360
- WRITER_CLASSES['csv'] = CSVWriter
361
- end
362
-
363
- end
@@ -1,153 +0,0 @@
1
- require 'bricolage/datasource'
2
- require 'bricolage/commandutils'
3
- require 'redis'
4
- require 'json'
5
-
6
- module Bricolage
7
-
8
- class RedisDataSource < DataSource
9
- declare_type 'redis'
10
-
11
- def initialize(host: 'localhost', port: 6380, **options)
12
- @host = host
13
- @port = port
14
- @options = options
15
- end
16
-
17
- attr_reader :host
18
- attr_reader :port
19
- attr_reader :options
20
-
21
- def new_task
22
- RedisTask.new(self)
23
- end
24
-
25
- def open
26
- client = Redis.new(host: @host, port: @port, **@options)
27
- yield client
28
- end
29
- end
30
-
31
- class RedisTask < DataSourceTask
32
- def import(src, table, query, key_column, prefix, encode, expire: nil)
33
- add Import.new(src, table, query, key_column, prefix, encode, expire)
34
- end
35
-
36
- class Import < Action
37
- def initialize(src, table, query, key_column, prefix, encode, expire)
38
- @src = src
39
- @table = table
40
- @query = query
41
- @key_columns = key_column.split(',').map(&:strip)
42
- @prefix = prefix || "#{@table.last.schema}_#{@table.last.name}_"
43
- @encode = encode
44
- @expire = expire
45
- end
46
-
47
- def bind(*args)
48
- @query.bind(*args)
49
- end
50
-
51
- def source
52
- @query.stripped_source
53
- end
54
-
55
- def run
56
- logger = ds.logger
57
- begin
58
- logger.info "Key Pattern: #{@prefix}<#{@key_columns.join('_')}>"
59
- logger.info "Encode: #{@encode}"
60
- logger.info "Expire: #{@expire}"
61
- ds.open {|client|
62
- writer = RedisRowWriter.for_encode(@encode).new(client, @prefix, @key_columns)
63
- import writer
64
- }
65
- rescue => ex
66
- logger.exception ex
67
- raise JobFailure, ex.message
68
- end
69
- JobResult.success
70
- end
71
-
72
- BATCH_SIZE = 5000
73
-
74
- def import(writer)
75
- count = 0
76
- @src.query_batch(source, BATCH_SIZE) do |rs|
77
- writer.pipelined {
78
- rs.each do |row|
79
- writer.write(row)
80
- count += 1
81
- ds.logger.info "transfered: #{count} rows" if count % 100_0000 == 0
82
- end
83
- }
84
- end
85
- ds.logger.info "all rows written: #{count} rows"
86
- end
87
- end
88
- end
89
-
90
- class RedisRowWriter
91
- def RedisRowWriter.for_encode(encode)
92
- case encode
93
- when 'hash' then RedisHashRowWriter
94
- when 'json' then RedisJSONRowWriter
95
- else
96
- raise ParameterError, "unsupported Redis encode: #{encode.inspect}"
97
- end
98
- end
99
-
100
- def initialize(client, prefix, key_columns)
101
- @client = client
102
- @prefix = prefix
103
- @key_columns = key_columns
104
- end
105
-
106
- attr_reader :prefix
107
- attr_reader :write_count
108
-
109
- def key(row)
110
- @prefix + @key_columns.map {|k| row[k] }.join('_')
111
- end
112
-
113
- def value_columns(row)
114
- r = row.dup
115
- @key_columns.each do |key|
116
- r.delete(key)
117
- end
118
- r.empty? ? {1 => 1} : r
119
- end
120
-
121
- def pipelined(&block)
122
- @client.pipelined(&block)
123
- end
124
-
125
- def write(row)
126
- key = key(row)
127
- futures = do_write(key, value_columns(row))
128
- futures.push @client.expire(key, @expire) if @expire
129
- futures
130
- end
131
-
132
- def expire
133
- @client.expire(key, @expire)
134
- end
135
- end
136
-
137
- class RedisHashRowWriter < RedisRowWriter
138
- def do_write(key, values)
139
- # set a value for each key:field pair
140
- values.map {|field, value|
141
- @client.hset(key, field, value)
142
- }
143
- end
144
- end
145
-
146
- class RedisJSONRowWriter < RedisRowWriter
147
- def do_write(key, values)
148
- future = @client.set(key, JSON.generate(values))
149
- [future]
150
- end
151
- end
152
-
153
- end
data/libexec/mys3dump.jar DELETED
Binary file
data/libexec/sqldump DELETED
@@ -1,9 +0,0 @@
1
- #!/bin/sh
2
-
3
- binary="$0.$(uname -s)"
4
- if ! [[ -x $binary ]]
5
- then
6
- echo "$0: error: sqldump does not support $(uname -s)" 1>&2
7
- exit 1
8
- fi
9
- exec "$binary" "$@"
Binary file
Binary file
@@ -1,8 +0,0 @@
1
- redshift-ds: sql
2
- event-queue-ds: sqs_strload_event
3
- task-queue-ds: sqs_strload_task
4
- url_patterns:
5
- -
6
- url: "s3://redshift-copy-buffer/development/log_stream/\\d{8}_\\d{4}_\\d+.*\\.gz"
7
- schema: "aamine"
8
- table: "pv_log"