bricolage 5.19.0 → 5.19.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e09f9be942c6431360ac3ec8afbd9cae5c03b06
4
- data.tar.gz: b9c2dc9c5a91b42b0e60fb43c089b12f58670128
3
+ metadata.gz: 08ee4bdc2cc0df34636a9d370bd8dcc658f1fa5a
4
+ data.tar.gz: a26fcb9a3150e12572bacc4bd98168ebab68888f
5
5
  SHA512:
6
- metadata.gz: be674ea50692704964d7360c35092e116078b5fb03ac0d2e2f620eb9abb083ffbc9a335d384e4f1c5a68594da4ff00e528097f707da6e03a2925e20a015fe7b5
7
- data.tar.gz: 69241018dcfa1862b327bcc9d43404890b91bc38bcd57836b5a34187507318a3ba9907de33534fef15c606bc01a08e5f4cd6d70ba2c0e0215d9614f5005af7fb
6
+ metadata.gz: 5915c56686b6757f8a059aa3d7bd05ae14ff0d2b8d806cca59392698fcd1ffcdf8e574a3b1204208c9b4f7dbefe92a8984165920b4a400b1c282950e3cc7c649
7
+ data.tar.gz: 30c0701ff39c2e15c2723a0fb4aac188a655c0ceea04c73df7c3097594367e316a563b51a62958cd3e3747f7b0b2ac38caaa7dba0081baac7e3751f4337944c5
@@ -18,6 +18,8 @@ class StreamingLoadJobClass < RubyJobClass
18
18
  optional: true, default: Bricolage::PSQLLoadOptions.new,
19
19
  value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
20
20
  params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
21
+ params.add Bricolage::StringParam.new('ctl-prefix', 'S3_PREFIX', 'S3 object key prefix for control files. (default: ${queue-path}/ctl)', optional: true)
22
+ params.add Bricolage::OptionalBoolParam.new('keep-ctl', 'Does not delete control files if true.')
21
23
  params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
22
24
  params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
23
25
  params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
@@ -61,6 +63,7 @@ class StreamingLoadJobClass < RubyJobClass
61
63
  RedshiftStreamingLoader.new(
62
64
  data_source: ds,
63
65
  queue: make_s3_queue(params),
66
+ keep_ctl: params['keep-ctl'],
64
67
  table: string(params['dest-table']),
65
68
  work_table: string(params['work-table']),
66
69
  log_table: string(params['log-table']),
@@ -76,6 +79,7 @@ class StreamingLoadJobClass < RubyJobClass
76
79
  ds = params['s3-ds']
77
80
  S3Queue.new(
78
81
  data_source: ds,
82
+ ctl_prefix: (params['ctl-prefix'] || "#{params['queue-path']}/ctl"),
79
83
  queue_path: params['queue-path'],
80
84
  persistent_path: params['persistent-path'],
81
85
  file_name: params['file-name'],
@@ -88,12 +92,13 @@ class StreamingLoadJobClass < RubyJobClass
88
92
  end
89
93
 
90
94
  class RedshiftStreamingLoader
91
- def initialize(data_source:, queue:,
95
+ def initialize(data_source:, queue:, keep_ctl:,
92
96
  table:, work_table: nil, log_table: nil, load_options: nil,
93
97
  sql: nil,
94
98
  logger:, noop: false, load_only: false)
95
99
  @ds = data_source
96
100
  @src = queue
101
+ @keep_ctl = keep_ctl
97
102
  @table = table
98
103
  @work_table = work_table
99
104
  @log_table = log_table
@@ -200,13 +205,17 @@ class StreamingLoadJobClass < RubyJobClass
200
205
  end
201
206
 
202
207
  def create_manifest_file(objects)
203
- manifest_name = "manifest-#{@job_process_id}.json"
208
+ manifest_name = if @keep_ctl
209
+ "#{Time.now.strftime('%Y/%m/%d')}/#{@job_process_id}-#{@table}.json"
210
+ else
211
+ "manifest-#{@job_process_id}.json"
212
+ end
204
213
  @logger.info "creating manifest: #{manifest_name}"
205
214
  json = make_manifest_json(objects)
206
215
  @logger.info "manifest:\n" + json
207
216
  url = @src.put_control_file(manifest_name, json, noop: @noop)
208
217
  yield url
209
- @src.remove_control_file(File.basename(url), noop: @noop)
218
+ @src.remove_control_file(File.basename(url), noop: @noop) unless @keep_ctl
210
219
  end
211
220
 
212
221
  def make_manifest_json(objects)
@@ -359,8 +368,9 @@ class StreamingLoadJobClass < RubyJobClass
359
368
  class S3Queue
360
369
  extend Forwardable
361
370
 
362
- def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
371
+ def initialize(data_source:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
363
372
  @ds = data_source
373
+ @ctl_prefix = ctl_prefix
364
374
  @queue_path = queue_path
365
375
  @persistent_path = persistent_path
366
376
  @file_name = file_name
@@ -399,7 +409,8 @@ class StreamingLoadJobClass < RubyJobClass
399
409
  end
400
410
 
401
411
  def control_file_path(name)
402
- "#{queue_path}/ctl/#{name}"
412
+ prefix = @ctl_prefix || "#{queue_path}/ctl"
413
+ "#{prefix}/#{name}"
403
414
  end
404
415
 
405
416
  def each(&block)
@@ -183,7 +183,9 @@ module Bricolage
183
183
  write_concurrency: options['write_concurrency'],
184
184
  rotation_size: options['rotation_size'],
185
185
  delete_objects: options['delete_objects'],
186
- object_key_delimiter: options['object_key_delimiter'])
186
+ object_key_delimiter: options['object_key_delimiter'],
187
+ src_zone_offset: options['src_zone_offset'],
188
+ dst_zone_offset: options['dst_zone_offset'])
187
189
  end
188
190
 
189
191
  class S3Export < Action
@@ -195,7 +197,9 @@ module Bricolage
195
197
  write_concurrency: 4,
196
198
  rotation_size: nil,
197
199
  delete_objects: false,
198
- object_key_delimiter: nil)
200
+ object_key_delimiter: nil,
201
+ src_zone_offset: nil,
202
+ dst_zone_offset: nil)
199
203
  @table = table
200
204
  @statement = stmt
201
205
  @s3ds = s3ds
@@ -208,6 +212,8 @@ module Bricolage
208
212
  @rotation_size = rotation_size
209
213
  @delete_objects = delete_objects
210
214
  @object_key_delimiter = object_key_delimiter
215
+ @src_zone_offset = src_zone_offset
216
+ @dst_zone_offset = dst_zone_offset
211
217
  end
212
218
 
213
219
  def run
@@ -250,6 +256,12 @@ module Bricolage
250
256
  params[:r] = @rotation_size if @rotation_size
251
257
  params[:d] = nil if @delete_objects
252
258
  params[:k] = @object_key_delimiter if @object_key_delimiter
259
+ if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
260
+ params[:S] = src_zone_offset
261
+ end
262
+ if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
263
+ params[:T] = dst_zone_offset
264
+ end
253
265
  params
254
266
  end
255
267
 
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.19.0'
3
+ VERSION = '5.19.1'
4
4
  end
data/libexec/mys3dump.jar CHANGED
Binary file
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- bricolage (5.19.0)
4
+ bricolage (5.19.1)
5
5
  aws-sdk (~> 2)
6
6
  mysql2
7
7
  pg (~> 0.18.0)
@@ -11,12 +11,12 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- aws-sdk (2.6.7)
15
- aws-sdk-resources (= 2.6.7)
16
- aws-sdk-core (2.6.7)
14
+ aws-sdk (2.6.14)
15
+ aws-sdk-resources (= 2.6.14)
16
+ aws-sdk-core (2.6.14)
17
17
  jmespath (~> 1.0)
18
- aws-sdk-resources (2.6.7)
19
- aws-sdk-core (= 2.6.7)
18
+ aws-sdk-resources (2.6.14)
19
+ aws-sdk-core (= 2.6.14)
20
20
  coderay (1.1.0)
21
21
  fluent-logger (0.5.1)
22
22
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -47,10 +47,10 @@ GEM
47
47
  td-logger (~> 0.3.21)
48
48
  yajl-ruby (~> 1.1)
49
49
  zip-zip (~> 0.3)
50
- td-client (0.8.83)
51
- httpclient (~> 2.7)
50
+ td-client (0.8.84)
51
+ httpclient (>= 2.7)
52
52
  json (>= 1.7.6)
53
- msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
53
+ msgpack (>= 0.5.6, < 2)
54
54
  td-logger (0.3.25)
55
55
  fluent-logger (~> 0.5.0)
56
56
  msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -67,4 +67,4 @@ DEPENDENCIES
67
67
  pry
68
68
 
69
69
  BUNDLED WITH
70
- 1.13.2
70
+ 1.13.6
@@ -42,16 +42,6 @@ postgres:
42
42
  encoding: utf8
43
43
  sql_log_level: DEBUG
44
44
 
45
- td:
46
- type: td
47
- database: logs
48
- apikey: <%= password 'td_tabemiru' %>
49
-
50
- td_search_log:
51
- type: td
52
- database: search_log
53
- apikey: <%= password 'td_tabemiru' %>
54
-
55
45
  s3:
56
46
  type: s3
57
47
  endpoint: "s3-ap-northeast-1.amazonaws.com"
@@ -74,27 +64,35 @@ mysql:
74
64
  password: <%= password 'mysql_shared_work_readonly' %>
75
65
  encoding: utf8
76
66
 
77
- s3_strload:
78
- type: s3
79
- region: "ap-northeast-1"
80
- endpoint: "s3-ap-northeast-1.amazonaws.com"
81
- bucket: tabemiru-data.ap-northeast-1
82
- prefix: tmp/strload
83
- access_key_id: "<%= password 'aws_access_key_id' %>"
84
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
67
+ sqs_preproc:
68
+ type: sqs
69
+ url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
70
+ max_number_of_messages: 10
71
+ visibility_timeout: 60
85
72
 
86
- sqs_strload_event:
73
+ sqs_dispatch:
87
74
  type: sqs
88
75
  url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
89
76
  max_number_of_messages: 10
90
77
  visibility_timeout: 60
91
- access_key_id: "<%= password 'aws_access_key_id' %>"
92
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
93
78
 
94
- sqs_strload_task:
79
+ sqs_task:
95
80
  type: sqs
96
81
  url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
97
82
  max_number_of_messages: 1
98
83
  visibility_timeout: 180
99
- access_key_id: "<%= password 'aws_access_key_id' %>"
100
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
84
+
85
+ sns:
86
+ type: sns
87
+ region: "ap-northeast-1"
88
+ topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
89
+
90
+ td:
91
+ type: td
92
+ database: logs
93
+ apikey: <%= password 'td_tabemiru' %>
94
+
95
+ td_search_log:
96
+ type: td
97
+ database: search_log
98
+ apikey: <%= password 'td_tabemiru' %>
@@ -0,0 +1,7 @@
1
+ #!/bin/sh
2
+
3
+ for i in data/2*.txt
4
+ do
5
+ aws s3 cp $i s3://tabemiru-data.ap-northeast-1/dev/queue/
6
+ done
7
+ aws s3 rm --recursive s3://tabemiru-data.ap-northeast-1/dev/save/
@@ -1,12 +1,14 @@
1
1
  class: streaming_load
2
2
 
3
3
  s3-ds: s3
4
+ ctl-prefix: ctl
5
+ keep-ctl: true
4
6
  queue-path: queue
5
- persistent-path: save/year=%Y/month=%m/day=%d/hour=%H
7
+ persistent-path: save/%Y/%m/%d
6
8
  file-name: "%Y%m%d-%H%M_%Q.txt"
7
9
 
8
10
  redshift-ds: sql
9
- dest-table: load_test
10
- work-table: load_test_wk
11
- log-table: load_test_l
11
+ dest-table: $test_schema.load_test
12
+ work-table: $test_schema.load_test_wk
13
+ log-table: $test_schema.load_test_l
12
14
  load-options: "delimiter '\\t'"
@@ -6,10 +6,8 @@ s3-ds: s3
6
6
  s3-prefix: shimpeko/test-abc-
7
7
  gzip: true
8
8
  dump-options:
9
- partition_column: id
10
- partition_number: 8
11
- write_concurrency: 16
12
- rotation_size: 16000000
9
+ src_zone_offset: "+00:00"
10
+ dst_zone_offset: "+09:00"
13
11
  delete_objects: true
14
12
  dest-ds: sql
15
13
  dest-table: $test_schema.users
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.0
4
+ version: 5.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-17 00:00:00.000000000 Z
11
+ date: 2016-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -207,7 +207,7 @@ files:
207
207
  - test/home/data/20141002-1355_02.txt
208
208
  - test/home/data/test.txt
209
209
  - test/home/jobnet-test.rb
210
- - test/home/sqltest.rb
210
+ - test/home/revert.sh
211
211
  - test/home/subsys/d.ct
212
212
  - test/home/subsys/insert.sql.job
213
213
  - test/home/subsys/job1.job
data/test/home/sqltest.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'bricolage/commandlineapplication'
2
- require 'pp'
3
-
4
- app = Bricolage::CommandLineApplication.define {|opts|
5
- opts.data_source_option('--ds', 'Target data source', short: '-D', kind: 'sql')
6
- }
7
- ds = app.data_source('--ds')
8
- ds.open {|conn|
9
- #task_ids = conn.query_values('select task_id from strload_tasks')
10
- #p [task_ids.class, task_ids.size]
11
-
12
- conn.update('create table t (x int)')
13
- pp conn.query_value('select count(*) from t')
14
- conn.update('drop table t')
15
- pp conn.query_value('select count(*) from t')
16
- }