bricolage 5.25.1 → 5.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b26f9e8d81a62e099e26e9c7f0bf3394c10ff754
4
- data.tar.gz: d2201749ca5fc7fee832116ab88071e5a65502f0
3
+ metadata.gz: 7331a52e0d7ce7f6e37a033455c4bbd0cd20ec98
4
+ data.tar.gz: 8c794e3df428209e2467e8edd7dca371d109eb17
5
5
  SHA512:
6
- metadata.gz: 40f1006c34882e1718445aaa26eb1bb0e3695eb9763b325c116132636a0c1b4aba57a7bb8112162c5a02bf643c56e3cc4eb6622a876717118b7d05e2a4e8c77e
7
- data.tar.gz: 8e62c714a483670c1307e4b7eda9bd3bfaf746c44232aa4d6437034166e32ffa6d3ae5349d981a9a280587875fb1cdbe5d68c4ee97943287f27da5c31570153f
6
+ metadata.gz: 8f58e1983098afd0dace568e5f32e415e12e50e39be6a04502ef9cab9347c1325370c4304de4859d481d2b1ad5246aeaf816bc7971ca7f5adb0747f1af47ad19
7
+ data.tar.gz: c9831a9e1c8f5694654e69ed695f8886db98b3fe46e9a8ef7384a9f120bd8890e2557a14f90318665f0ad713cb087b6ce1ae103e9943c55aef98eb71ac684ff8
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.25.1'
3
+ VERSION = '5.26.0'
4
4
  end
@@ -1,12 +1,24 @@
1
+ PATH
2
+ remote: ../../../bricolage-mysql
3
+ specs:
4
+ bricolage-mysql (5.26.0)
5
+ bricolage (>= 5.26.0)
6
+ mysql2
7
+
8
+ PATH
9
+ remote: ../../../bricolage-redis
10
+ specs:
11
+ bricolage-redis (5.26.0)
12
+ bricolage (>= 5.26.0)
13
+ redis (~> 3)
14
+
1
15
  PATH
2
16
  remote: ../..
3
17
  specs:
4
- bricolage (5.25.0)
18
+ bricolage (5.26.0)
5
19
  aws-sdk-s3 (~> 1)
6
20
  aws-sdk-sns (~> 1)
7
- mysql2
8
21
  pg (~> 0.18.0)
9
- redis (>= 3.0.0)
10
22
 
11
23
  GEM
12
24
  remote: https://rubygems.org/
@@ -35,13 +47,15 @@ GEM
35
47
  pry (0.11.3)
36
48
  coderay (~> 1.1.0)
37
49
  method_source (~> 0.9.0)
38
- redis (4.0.1)
50
+ redis (3.3.3)
39
51
 
40
52
  PLATFORMS
41
53
  ruby
42
54
 
43
55
  DEPENDENCIES
44
56
  bricolage!
57
+ bricolage-mysql!
58
+ bricolage-redis!
45
59
  pry
46
60
 
47
61
  BUNDLED WITH
@@ -79,29 +79,16 @@ mysql_summary:
79
79
  password: <%= password 'mysql_shared_work_readonly' %>
80
80
  encoding: utf8
81
81
 
82
- sqs_preproc:
83
- type: sqs
84
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
85
- max_number_of_messages: 10
86
- visibility_timeout: 60
87
-
88
- sqs_dispatch:
89
- type: sqs
90
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
91
- max_number_of_messages: 10
92
- visibility_timeout: 60
93
-
94
- sqs_task:
95
- type: sqs
96
- url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
97
- max_number_of_messages: 1
98
- visibility_timeout: 180
99
-
100
82
  sns:
101
83
  type: sns
102
84
  region: "ap-northeast-1"
103
85
  topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
104
86
 
87
+ redis:
88
+ type: redis
89
+ host: localhost
90
+ port: 6379
91
+
105
92
  td:
106
93
  type: td
107
94
  database: logs
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.25.1
4
+ version: 5.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
@@ -52,34 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1'
55
- - !ruby/object:Gem::Dependency
56
- name: mysql2
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: redis
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 3.0.0
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: 3.0.0
83
55
  - !ruby/object:Gem::Dependency
84
56
  name: test-unit
85
57
  requirement: !ruby/object:Gem::Requirement
@@ -154,14 +126,9 @@ files:
154
126
  - jobclass/insert-delta.rb
155
127
  - jobclass/insert.rb
156
128
  - jobclass/load.rb
157
- - jobclass/my-export.rb
158
- - jobclass/my-import-delta.rb
159
- - jobclass/my-import.rb
160
- - jobclass/my-migrate.rb
161
129
  - jobclass/noop.rb
162
130
  - jobclass/rebuild-drop.rb
163
131
  - jobclass/rebuild-rename.rb
164
- - jobclass/redis-export.rb
165
132
  - jobclass/s3-put.rb
166
133
  - jobclass/sql.rb
167
134
  - jobclass/streaming_load.rb
@@ -190,11 +157,9 @@ files:
190
157
  - lib/bricolage/logger.rb
191
158
  - lib/bricolage/loglocator.rb
192
159
  - lib/bricolage/loglocatorbuilder.rb
193
- - lib/bricolage/mysqldatasource.rb
194
160
  - lib/bricolage/parameters.rb
195
161
  - lib/bricolage/postgresconnection.rb
196
162
  - lib/bricolage/psqldatasource.rb
197
- - lib/bricolage/redisdatasource.rb
198
163
  - lib/bricolage/resource.rb
199
164
  - lib/bricolage/rubyjobclass.rb
200
165
  - lib/bricolage/s3datasource.rb
@@ -208,10 +173,6 @@ files:
208
173
  - lib/bricolage/variables.rb
209
174
  - lib/bricolage/version.rb
210
175
  - libexec/create-lockfile
211
- - libexec/mys3dump.jar
212
- - libexec/sqldump
213
- - libexec/sqldump.Darwin
214
- - libexec/sqldump.Linux
215
176
  - test/all.rb
216
177
  - test/home/Gemfile
217
178
  - test/home/Gemfile.lock
@@ -219,7 +180,6 @@ files:
219
180
  - test/home/config/development/database.yml
220
181
  - test/home/config/development/password.yml
221
182
  - test/home/config/development/variable.yml
222
- - test/home/config/streamingload.yml
223
183
  - test/home/data/20141002-1355_00.txt
224
184
  - test/home/data/20141002-1355_01.txt
225
185
  - test/home/data/20141002-1355_02.txt
@@ -1,40 +0,0 @@
1
- JobClass.define('my-export') {
2
- parameters {|params|
3
- params.add SQLFileParam.new(optional: true)
4
- params.add DestFileParam.new
5
- params.add SrcTableParam.new
6
- params.add EnumParam.new('format', %w(json tsv csv), 'Target file format.', default: 'json')
7
- params.add OptionalBoolParam.new('gzip', 'If true, compresses target file by gzip.')
8
- params.add OptionalBoolParam.new('override', 'If true, clears target file. Otherwise causes error.')
9
- params.add OptionalBoolParam.new('sqldump', 'If true, clears use sqldump command to dump, only wheen usable.')
10
- params.add DataSourceParam.new('mysql')
11
- }
12
-
13
- declarations {|params|
14
- sql_statement(params).declarations
15
- }
16
-
17
- script {|params, script|
18
- script.task(params['data-source']) {|task|
19
- task.export sql_statement(params),
20
- path: params['dest-file'],
21
- format: params['format'],
22
- override: params['override'],
23
- gzip: params['gzip'],
24
- sqldump: params['sqldump']
25
- }
26
- }
27
-
28
- def sql_statement(params)
29
- if sql = params['sql-file']
30
- sql
31
- else
32
- srcs = params['src-tables']
33
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
34
- src_table_var = srcs.keys.first
35
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
36
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
37
- stmt
38
- end
39
- end
40
- }
@@ -1,66 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/mysqldatasource'
3
-
4
- JobClass.define('my-import-delta') {
5
- parameters {|params|
6
- # S3Export
7
- params.add SrcTableParam.new(optional: false)
8
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
9
- params.add SQLFileParam.new(optional: true)
10
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
11
- params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
12
- params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
13
-
14
- # Delete, Load
15
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
16
- params.add StringParam.new('delete-cond', 'SQL_EXPR', 'DELETE condition.')
17
- params.add DestTableParam.new(optional: false)
18
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
19
- optional: true, default: PSQLLoadOptions.new,
20
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
21
-
22
- # Misc
23
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
24
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
25
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
26
-
27
- # All
28
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
29
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
30
- params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
31
- }
32
-
33
- script {|params, script|
34
- run_all = !params['export'] && !params['load']
35
-
36
- # S3Export
37
- if params['export'] || run_all
38
- script.task(params['src-ds']) {|task|
39
- task.s3export params['src-tables'].values.first.to_s,
40
- params['sql-file'],
41
- params['s3-ds'],
42
- params['s3-prefix'],
43
- params['gzip'],
44
- dump_options: params['dump-options']
45
- }
46
- end
47
-
48
- # Load
49
- if params['load'] || run_all
50
- script.task(params['dest-ds']) {|task|
51
- task.transaction {
52
- # DELETE
53
- task.exec SQLStatement.delete_where(params['delete-cond']) if params['delete-cond']
54
-
55
- # COPY
56
- task.load params['s3-ds'], params['s3-prefix'], params['dest-table'],
57
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
58
- }
59
-
60
- # VACUUM, ANALYZE
61
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table']
62
- task.analyze_if params['analyze'], params['dest-table']
63
- }
64
- end
65
- }
66
- }
@@ -1,84 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/mysqldatasource'
3
-
4
- JobClass.define('my-import') {
5
- parameters {|params|
6
- # S3Export
7
- params.add SrcTableParam.new(optional: false)
8
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
9
- params.add SQLFileParam.new(optional: true)
10
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
11
- params.add DestFileParam.new('s3-prefix', 'PREFIX', 'Temporary S3 prefix.')
12
- params.add KeyValuePairsParam.new('dump-options', 'KEY:VALUE', 'dump options.', optional: true)
13
-
14
- # Load
15
- params.add DestTableParam.new(optional: false)
16
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
17
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
18
- optional: true, default: PSQLLoadOptions.new,
19
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
20
- params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
21
- params.add OptionalBoolParam.new('no-backup', 'Do not backup current table with suffix "_old".', default: false)
22
-
23
- # Misc
24
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
25
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
26
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
27
- params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
28
-
29
- # All
30
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
31
- params.add OptionalBoolParam.new('put', 'Runs PUT task.')
32
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
33
- params.add OptionalBoolParam.new('gzip', 'Compress Temporary files.')
34
- }
35
-
36
- script {|params, script|
37
- run_all = !params['export'] && !params['put'] && !params['load']
38
-
39
- # S3Export
40
- if params['export'] || run_all
41
- script.task(params['src-ds']) {|task|
42
- task.s3export params['src-tables'].keys.first,
43
- params['sql-file'],
44
- params['s3-ds'],
45
- params['s3-prefix'],
46
- params['gzip'],
47
- dump_options: params['dump-options']
48
- }
49
- end
50
-
51
- # Load
52
- if params['load'] || run_all
53
- script.task(params['dest-ds']) {|task|
54
- prev_table = '${dest_table}_old'
55
- work_table = '${dest_table}_wk'
56
-
57
- task.transaction {
58
- # CREATE
59
- task.drop_force prev_table
60
- task.drop_force work_table
61
- task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
62
-
63
- # COPY
64
- task.load params['s3-ds'], params['s3-prefix'], work_table,
65
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
66
-
67
- # GRANT, ANALYZE
68
- task.grant_if params['grant'], work_table
69
- task.analyze_if params['analyze'], work_table
70
-
71
- # RENAME
72
- task.create_dummy_table '${dest_table}'
73
- task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
74
- task.rename_table work_table, params['dest-table'].name
75
- }
76
-
77
- task.drop_force prev_table if params['no-backup']
78
-
79
- # VACUUM: vacuum is needless for newly created table, applying vacuum after exposure is not a problem.
80
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table'].to_s
81
- }
82
- end
83
- }
84
- }
@@ -1,116 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
-
3
- JobClass.define('my-migrate') {
4
- parameters {|params|
5
- # Export
6
- params.add SrcTableParam.new(optional: false)
7
- params.add DataSourceParam.new('mysql', 'src-ds', 'Source data source.')
8
- params.add DestFileParam.new('tmp-file', 'PATH', 'Temporary local file path.')
9
- params.add OptionalBoolParam.new('sqldump', 'If true, use sqldump command to dump, only on available.', default: true)
10
- params.add SQLFileParam.new(optional: true)
11
-
12
- # Put
13
- params.add DestFileParam.new('s3-file', 'PATH', 'Temporary S3 file path.')
14
- params.add DataSourceParam.new('s3', 's3-ds', 'Temporary file storage.')
15
- params.add OptionalBoolParam.new('override', 'If true, overwrite s3 target file. Otherwise causes error.')
16
- params.add OptionalBoolParam.new('remove-tmp', 'Removes temporary local files after S3-PUT is succeeded.')
17
-
18
- # Load
19
- params.add DestTableParam.new(optional: false)
20
- params.add DataSourceParam.new('sql', 'dest-ds', 'Destination data source.')
21
- params.add KeyValuePairsParam.new('options', 'OPTIONS', 'Loader options.',
22
- optional: true, default: PSQLLoadOptions.new,
23
- value_handler: lambda {|value, ctx, vars| PSQLLoadOptions.parse(value) })
24
- params.add SQLFileParam.new('table-def', 'PATH', 'Create table file.')
25
- params.add OptionalBoolParam.new('no-backup', 'Do not backup current table with suffix "_old".', default: false)
26
-
27
- # Misc
28
- params.add OptionalBoolParam.new('analyze', 'ANALYZE table after SQL is executed.', default: true)
29
- params.add OptionalBoolParam.new('vacuum', 'VACUUM table after SQL is executed.')
30
- params.add OptionalBoolParam.new('vacuum-sort', 'VACUUM SORT table after SQL is executed.')
31
- params.add KeyValuePairsParam.new('grant', 'KEY:VALUE', 'GRANT table after SQL is executed. (required keys: privilege, to)')
32
-
33
- # All
34
- params.add OptionalBoolParam.new('export', 'Runs EXPORT task.')
35
- params.add OptionalBoolParam.new('put', 'Runs PUT task.')
36
- params.add OptionalBoolParam.new('load', 'Runs LOAD task.')
37
- params.add OptionalBoolParam.new('gzip', 'If true, compresses target file by gzip.', default: true)
38
- }
39
-
40
- declarations {|params|
41
- decls = sql_statement(params).declarations
42
- decls.declare 'dest-table', nil
43
- decls
44
- }
45
-
46
- script {|params, script|
47
- run_all = !params['export'] && !params['put'] && !params['load']
48
-
49
- # Export
50
- if params['export'] || run_all
51
- script.task(params['src-ds']) {|task|
52
- task.export sql_statement(params),
53
- path: params['tmp-file'],
54
- format: 'json',
55
- override: true,
56
- gzip: params['gzip'],
57
- sqldump: params['sqldump']
58
- }
59
- end
60
-
61
- # Put
62
- if params['put'] || run_all
63
- script.task(params['s3-ds']) {|task|
64
- task.put params['tmp-file'], params['s3-file'], check_args: false
65
- }
66
- if params['remove-tmp']
67
- script.task(params.file_ds) {|task|
68
- task.remove params['tmp-file']
69
- }
70
- end
71
- end
72
-
73
- # Load
74
- if params['load'] || run_all
75
- script.task(params['dest-ds']) {|task|
76
- prev_table = '${dest_table}_old'
77
- work_table = '${dest_table}_wk'
78
-
79
- task.transaction {
80
- # CREATE
81
- task.drop_force prev_table
82
- task.drop_force work_table
83
- task.exec params['table-def'].replace(/\$\{?dest_table\}?\b/, work_table)
84
-
85
- # COPY
86
- task.load params['s3-ds'], params['s3-file'], work_table,
87
- 'json', nil, params['options'].merge('gzip' => params['gzip'])
88
-
89
- # GRANT, ANALYZE
90
- task.grant_if params['grant'], work_table
91
- task.analyze_if params['analyze'], work_table
92
-
93
- # RENAME
94
- task.create_dummy_table '${dest_table}'
95
- task.rename_table params['dest-table'].to_s, "#{params['dest-table'].name}_old"
96
- task.rename_table work_table, params['dest-table'].name
97
- }
98
-
99
- task.drop_force prev_table if params['no-backup']
100
-
101
- # VACUUM: vacuum is needless for newly created table, applying vacuum after exposure is not a problem.
102
- task.vacuum_if params['vacuum'], params['vacuum-sort'], params['dest-table'].to_s
103
- }
104
- end
105
- }
106
-
107
- def sql_statement(params)
108
- return params['sql-file'] if params['sql-file']
109
- srcs = params['src-tables']
110
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
111
- src_table_var = srcs.keys.first
112
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
113
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
114
- stmt
115
- end
116
- }
@@ -1,42 +0,0 @@
1
- require 'bricolage/psqldatasource'
2
- require 'bricolage/redisdatasource'
3
- require 'redis'
4
-
5
- JobClass.define('redis-export') {
6
- parameters {|params|
7
- # Export
8
- params.add DataSourceParam.new('psql', 'src-ds', 'Source data source.')
9
- params.add SrcTableParam.new(optional: false)
10
- params.add SQLFileParam.new(optional: true)
11
-
12
- # Redis import
13
- params.add DataSourceParam.new('redis', 'dest-ds', 'Redis cluster')
14
- params.add StringParam.new('key-column', 'REDIS_KEY', 'Redis object key. default: id', optional: true)
15
- params.add StringParam.new('prefix', 'REDIS_PREFIX', 'Redis object key prefix', optional: true)
16
- params.add StringParam.new('encode', 'REDIS_ENCODE', 'Redis object encoding. default: hash', optional: true)
17
- params.add StringParam.new('expire', 'REDIS_TTL', 'Redis object TTL. default: none', optional: true)
18
- }
19
-
20
- script {|params, script|
21
- # Export
22
- script.task(params['dest-ds']) {|task|
23
- task.import params['src-ds'],
24
- params['src-tables'].first,
25
- sql_statement(params),
26
- params['key-column'] || "id",
27
- params['prefix'],
28
- params['encode'] || "hash",
29
- expire: params['expire'].to_i
30
- }
31
- }
32
-
33
- def sql_statement(params)
34
- return params['sql-file'] if params['sql-file']
35
- srcs = params['src-tables']
36
- raise ParameterError, "src-tables must be singleton when no sql-file is given" unless srcs.size == 1
37
- src_table_var = srcs.keys.first
38
- stmt = SQLStatement.for_string("select * from $#{src_table_var};")
39
- stmt.declarations = Declarations.new({src_table_var => src_table_var})
40
- stmt
41
- end
42
- }
@@ -1,363 +0,0 @@
1
- require 'bricolage/datasource'
2
- require 'mysql2'
3
- require 'json'
4
- require 'csv'
5
- require 'stringio'
6
- require 'open3'
7
-
8
- module Bricolage
9
-
10
- class MySQLDataSource < DataSource
11
- declare_type 'mysql'
12
-
13
- def initialize(**mysql_options)
14
- @mysql_options = mysql_options
15
- @client = nil
16
- end
17
-
18
- attr_reader :mysql_options
19
-
20
- def host
21
- @mysql_options[:host]
22
- end
23
-
24
- def port
25
- @mysql_options[:port]
26
- end
27
-
28
- def username
29
- @mysql_options[:username]
30
- end
31
-
32
- def password
33
- @mysql_options[:password]
34
- end
35
-
36
- def database
37
- @mysql_options[:database]
38
- end
39
-
40
- def new_task
41
- MySQLTask.new(self)
42
- end
43
-
44
- def open
45
- @client = Mysql2::Client.new(**@mysql_options)
46
- begin
47
- yield self
48
- ensure
49
- c = @client
50
- @client = nil
51
- c.close
52
- end
53
- end
54
-
55
- def query(sql, **opts)
56
- logger.info "[SQL] #{sql}"
57
- connection_check
58
- @client.query(sql, **opts)
59
- end
60
-
61
- private
62
-
63
- def connection_check
64
- unless @client
65
- raise FatalError, "#{self.class} used outside of \#open block"
66
- end
67
- end
68
- end
69
-
70
- class MySQLTask < DataSourceTask
71
- def export(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
72
- add Export.new(stmt, path: path, format: format, override: override, gzip: gzip, sqldump: sqldump)
73
- end
74
-
75
- class Export < Action
76
- def initialize(stmt, path: nil, format: nil, override: false, gzip: false, sqldump: false)
77
- @statement = stmt
78
- @path = path
79
- @format = format
80
- @override = override
81
- @gzip = gzip
82
- @sqldump = sqldump
83
- end
84
-
85
- def bind(*args)
86
- @statement.bind(*args)
87
- end
88
-
89
- def source
90
- @statement.stripped_source
91
- end
92
-
93
- def run
94
- if @sqldump and sqldump_available? and sqldump_usable?
95
- export_by_sqldump
96
- else
97
- export_by_ruby
98
- end
99
- JobResult.success
100
- end
101
-
102
- def export_by_sqldump
103
- cmds = [[{"SQLDUMP_PASSWORD" => ds.password}, sqldump_path.to_s, "--#{@format}", ds.host, ds.port.to_s, ds.username, ds.database, @statement.stripped_source]]
104
- cmds.push [GZIP_COMMAND] if @gzip
105
- cmds.last.push({out: @path.to_s})
106
- ds.logger.info '[CMD] ' + format_pipeline(cmds)
107
- statuses = Open3.pipeline(*cmds)
108
- statuses.each_with_index do |st, idx|
109
- unless st.success?
110
- cmd = cmds[idx].first
111
- raise JobFailure, "sqldump failed (status #{st.to_i})"
112
- end
113
- end
114
- end
115
-
116
- def format_pipeline(cmds)
117
- cmds = cmds.map {|args| args[0].kind_of?(Hash) ? args[1..-1] : args.dup } # do not show env
118
- cmds.map {|args| %Q("#{args.join('" "')}") }.join(' | ')
119
- end
120
-
121
- def sqldump_available?
122
- sqldump_real_path.executable?
123
- end
124
-
125
- def sqldump_path
126
- Pathname(__dir__).parent.parent + "libexec/sqldump"
127
- end
128
-
129
- def sqldump_real_path
130
- Pathname("#{sqldump_path}.#{platform_name}")
131
- end
132
-
133
- def platform_name
134
- @platform_name ||= `uname -s`.strip
135
- end
136
-
137
- def sqldump_usable?
138
- %w[json tsv].include?(@format)
139
- end
140
-
141
- def export_by_ruby
142
- ds.logger.info "exporting table into #{@path} ..."
143
- count = 0
144
- open_target_file(@path) {|f|
145
- writer_class = WRITER_CLASSES[@format] or raise ArgumentError, "unknown export format: #{@format.inspect}"
146
- writer = writer_class.new(f)
147
- rs = ds.query(@statement.stripped_source, as: writer_class.record_format, stream: true, cache_rows: false)
148
- ds.logger.info "got result set, writing..."
149
- rs.each do |values|
150
- writer.write_record values
151
- count += 1
152
- ds.logger.info "#{count} records exported..." if count % 10_0000 == 0
153
- end
154
- }
155
- ds.logger.info "#{count} records exported; export finished"
156
- end
157
-
158
- private
159
-
160
- # FIXME: parameterize
161
- GZIP_COMMAND = 'gzip'
162
-
163
- def open_target_file(path, &block)
164
- unless @override
165
- raise JobFailure, "destination file already exists: #{path}" if File.exist?(path)
166
- end
167
- if @gzip
168
- ds.logger.info "enable compression: gzip"
169
- IO.popen(%Q(#{GZIP_COMMAND} > "#{path}"), 'w', &block)
170
- else
171
- File.open(path, 'w', &block)
172
- end
173
- end
174
- end
175
-
176
- def s3export(table, stmt, s3ds, prefix, gzip, dump_options)
177
- options = dump_options.nil? ? {} : dump_options[:dump_options]
178
- add S3Export.new(table, stmt, s3ds, prefix, gzip: gzip,
179
- format: options['format'],
180
- partition_column: options['partition_column'],
181
- partition_number: options['partition_number'],
182
- write_concurrency: options['write_concurrency'],
183
- rotation_size: options['rotation_size'],
184
- delete_objects: options['delete_objects'],
185
- object_key_delimiter: options['object_key_delimiter'],
186
- src_zone_offset: options['src_zone_offset'],
187
- dst_zone_offset: options['dst_zone_offset'])
188
- end
189
-
190
- class S3Export < Action
191
-
192
- def initialize(table, stmt, s3ds, prefix, gzip: true,
193
- format: "json",
194
- partition_column: nil,
195
- partition_number: 4,
196
- write_concurrency: 4,
197
- rotation_size: nil,
198
- delete_objects: false,
199
- object_key_delimiter: nil,
200
- src_zone_offset: nil,
201
- dst_zone_offset: nil)
202
- @table = table
203
- @statement = stmt
204
- @s3ds = s3ds
205
- @prefix = build_prefix @s3ds.prefix, prefix
206
- @format = format
207
- @gzip = gzip
208
- @partition_column = partition_column
209
- @partition_number = partition_number
210
- @write_concurrency = write_concurrency
211
- @rotation_size = rotation_size
212
- @delete_objects = delete_objects
213
- @object_key_delimiter = object_key_delimiter
214
- @src_zone_offset = src_zone_offset
215
- @dst_zone_offset = dst_zone_offset
216
- end
217
-
218
- def run
219
- s3export
220
- JobResult.success
221
- end
222
-
223
- def bind(*args)
224
- @statement.bind(*args) if @statement
225
- end
226
-
227
- def source
228
- "-- myexport #{@table} -> #{@s3ds.bucket_name}/#{@prefix}" +
229
- (@statement ? "\n#{@statement.stripped_source}" : "")
230
- end
231
-
232
- def s3export
233
- cmd = build_cmd(command_parameters)
234
- ds.logger.info "[CMD] #{cmd}"
235
- out, st = Open3.capture2e(environment_variables, cmd)
236
- ds.logger.info "[CMDOUT] #{out}"
237
- unless st.success?
238
- msg = extract_exception_message(out)
239
- raise JobFailure, "mys3dump failed (status: #{st.to_i}): #{msg}"
240
- end
241
- end
242
-
243
- def environment_variables
244
- {
245
- 'AWS_ACCESS_KEY_ID' => @s3ds.access_key,
246
- 'AWS_SECRET_ACCESS_KEY' => @s3ds.secret_key,
247
- 'MYS3DUMP_PASSWORD' => ds.password
248
- }
249
- end
250
-
251
- def command_parameters
252
- params = {
253
- jar: mys3dump_path.to_s,
254
- h: ds.host,
255
- P: ds.port.to_s,
256
- D: ds.database,
257
- u: ds.username,
258
- #p: ds.password,
259
- o: connection_property,
260
- t: @table,
261
- b: @s3ds.bucket.name,
262
- x: @prefix
263
- }
264
- params[:q] = @statement.stripped_source.chomp(';') if @statement
265
- params[:f] = @format if @format
266
- params[:C] = nil if @gzip
267
- params[:c] = @partition_column if @partition_column
268
- params[:n] = @partition_number if @partition_number
269
- params[:w] = @write_concurrency if @write_concurrency
270
- params[:r] = @rotation_size if @rotation_size
271
- params[:d] = nil if @delete_objects
272
- params[:k] = @object_key_delimiter if @object_key_delimiter
273
- if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
274
- params[:S] = src_zone_offset
275
- end
276
- if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
277
- params[:T] = dst_zone_offset
278
- end
279
- params
280
- end
281
-
282
- OPTION_MAP = {
283
- encoding: 'useUnicode=true&characterEncoding',
284
- read_timeout: 'netTimeoutForStreamingResults',
285
- connect_timeout: 'connectTimeout',
286
- reconnect: 'autoReconnect',
287
- collation: 'connectionCollation'
288
- }
289
-
290
- def connection_property
291
- ds.mysql_options.map {|k, v| opt = OPTION_MAP[k] ; opt ? "#{opt}=#{v}" : nil }.compact.join('&')
292
- end
293
-
294
- def build_prefix(ds_prefix, pm_prefix)
295
- ((ds_prefix || "") + "//" + (pm_prefix.to_s || "")).gsub(%r<\A/>, '').gsub(%r<//>, '/')
296
- end
297
-
298
- def mys3dump_path
299
- Pathname(__dir__).parent.parent + "libexec/mys3dump.jar"
300
- end
301
-
302
- def build_cmd(options)
303
- (['java'] + options.flat_map {|k, v| v ? ["-#{k}", v.to_s] : ["-#{k}"] }.map {|o| %Q("#{o}") }).join(" ")
304
- end
305
-
306
- def extract_exception_message(out)
307
- out.lines do |line|
308
- if /^.*Exception: (?<msg>.*)$/ =~ line
309
- return msg
310
- end
311
- end
312
- end
313
- end
314
-
315
- WRITER_CLASSES = {}
316
-
317
- class JSONWriter
318
- def JSONWriter.record_format
319
- :hash
320
- end
321
-
322
- def initialize(f)
323
- @f = f
324
- end
325
-
326
- def write_record(values)
327
- @f.puts JSON.dump(values)
328
- end
329
- end
330
- WRITER_CLASSES['json'] = JSONWriter
331
-
332
- class TSVWriter
333
- def TSVWriter.record_format
334
- :array
335
- end
336
-
337
- def initialize(f)
338
- @f = f
339
- end
340
-
341
- def write_record(values)
342
- @f.puts values.join("\t")
343
- end
344
- end
345
- WRITER_CLASSES['tsv'] = TSVWriter
346
-
347
- class CSVWriter
348
- def CSVWriter.record_format
349
- :array
350
- end
351
-
352
- def initialize(f)
353
- @csv = CSV.new(f)
354
- end
355
-
356
- def write_record(values)
357
- @csv.add_row values
358
- end
359
- end
360
- WRITER_CLASSES['csv'] = CSVWriter
361
- end
362
-
363
- end
@@ -1,153 +0,0 @@
1
- require 'bricolage/datasource'
2
- require 'bricolage/commandutils'
3
- require 'redis'
4
- require 'json'
5
-
6
- module Bricolage
7
-
8
- class RedisDataSource < DataSource
9
- declare_type 'redis'
10
-
11
- def initialize(host: 'localhost', port: 6380, **options)
12
- @host = host
13
- @port = port
14
- @options = options
15
- end
16
-
17
- attr_reader :host
18
- attr_reader :port
19
- attr_reader :options
20
-
21
- def new_task
22
- RedisTask.new(self)
23
- end
24
-
25
- def open
26
- client = Redis.new(host: @host, port: @port, **@options)
27
- yield client
28
- end
29
- end
30
-
31
- class RedisTask < DataSourceTask
32
- def import(src, table, query, key_column, prefix, encode, expire: nil)
33
- add Import.new(src, table, query, key_column, prefix, encode, expire)
34
- end
35
-
36
- class Import < Action
37
- def initialize(src, table, query, key_column, prefix, encode, expire)
38
- @src = src
39
- @table = table
40
- @query = query
41
- @key_columns = key_column.split(',').map(&:strip)
42
- @prefix = prefix || "#{@table.last.schema}_#{@table.last.name}_"
43
- @encode = encode
44
- @expire = expire
45
- end
46
-
47
- def bind(*args)
48
- @query.bind(*args)
49
- end
50
-
51
- def source
52
- @query.stripped_source
53
- end
54
-
55
- def run
56
- logger = ds.logger
57
- begin
58
- logger.info "Key Pattern: #{@prefix}<#{@key_columns.join('_')}>"
59
- logger.info "Encode: #{@encode}"
60
- logger.info "Expire: #{@expire}"
61
- ds.open {|client|
62
- writer = RedisRowWriter.for_encode(@encode).new(client, @prefix, @key_columns)
63
- import writer
64
- }
65
- rescue => ex
66
- logger.exception ex
67
- raise JobFailure, ex.message
68
- end
69
- JobResult.success
70
- end
71
-
72
- BATCH_SIZE = 5000
73
-
74
- def import(writer)
75
- count = 0
76
- @src.query_batch(source, BATCH_SIZE) do |rs|
77
- writer.pipelined {
78
- rs.each do |row|
79
- writer.write(row)
80
- count += 1
81
- ds.logger.info "transfered: #{count} rows" if count % 100_0000 == 0
82
- end
83
- }
84
- end
85
- ds.logger.info "all rows written: #{count} rows"
86
- end
87
- end
88
- end
89
-
90
- class RedisRowWriter
91
- def RedisRowWriter.for_encode(encode)
92
- case encode
93
- when 'hash' then RedisHashRowWriter
94
- when 'json' then RedisJSONRowWriter
95
- else
96
- raise ParameterError, "unsupported Redis encode: #{encode.inspect}"
97
- end
98
- end
99
-
100
- def initialize(client, prefix, key_columns)
101
- @client = client
102
- @prefix = prefix
103
- @key_columns = key_columns
104
- end
105
-
106
- attr_reader :prefix
107
- attr_reader :write_count
108
-
109
- def key(row)
110
- @prefix + @key_columns.map {|k| row[k] }.join('_')
111
- end
112
-
113
- def value_columns(row)
114
- r = row.dup
115
- @key_columns.each do |key|
116
- r.delete(key)
117
- end
118
- r.empty? ? {1 => 1} : r
119
- end
120
-
121
- def pipelined(&block)
122
- @client.pipelined(&block)
123
- end
124
-
125
- def write(row)
126
- key = key(row)
127
- futures = do_write(key, value_columns(row))
128
- futures.push @client.expire(key, @expire) if @expire
129
- futures
130
- end
131
-
132
- def expire
133
- @client.expire(key, @expire)
134
- end
135
- end
136
-
137
- class RedisHashRowWriter < RedisRowWriter
138
- def do_write(key, values)
139
- # set a value for each key:field pair
140
- values.map {|field, value|
141
- @client.hset(key, field, value)
142
- }
143
- end
144
- end
145
-
146
- class RedisJSONRowWriter < RedisRowWriter
147
- def do_write(key, values)
148
- future = @client.set(key, JSON.generate(values))
149
- [future]
150
- end
151
- end
152
-
153
- end
data/libexec/mys3dump.jar DELETED
Binary file
data/libexec/sqldump DELETED
@@ -1,9 +0,0 @@
1
- #!/bin/sh
2
-
3
- binary="$0.$(uname -s)"
4
- if ! [[ -x $binary ]]
5
- then
6
- echo "$0: error: sqldump does not support $(uname -s)" 1>&2
7
- exit 1
8
- fi
9
- exec "$binary" "$@"
Binary file
Binary file
@@ -1,8 +0,0 @@
1
- redshift-ds: sql
2
- event-queue-ds: sqs_strload_event
3
- task-queue-ds: sqs_strload_task
4
- url_patterns:
5
- -
6
- url: "s3://redshift-copy-buffer/development/log_stream/\\d{8}_\\d{4}_\\d+.*\\.gz"
7
- schema: "aamine"
8
- table: "pv_log"