bricolage 5.19.0 → 5.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4e09f9be942c6431360ac3ec8afbd9cae5c03b06
4
- data.tar.gz: b9c2dc9c5a91b42b0e60fb43c089b12f58670128
3
+ metadata.gz: 08ee4bdc2cc0df34636a9d370bd8dcc658f1fa5a
4
+ data.tar.gz: a26fcb9a3150e12572bacc4bd98168ebab68888f
5
5
  SHA512:
6
- metadata.gz: be674ea50692704964d7360c35092e116078b5fb03ac0d2e2f620eb9abb083ffbc9a335d384e4f1c5a68594da4ff00e528097f707da6e03a2925e20a015fe7b5
7
- data.tar.gz: 69241018dcfa1862b327bcc9d43404890b91bc38bcd57836b5a34187507318a3ba9907de33534fef15c606bc01a08e5f4cd6d70ba2c0e0215d9614f5005af7fb
6
+ metadata.gz: 5915c56686b6757f8a059aa3d7bd05ae14ff0d2b8d806cca59392698fcd1ffcdf8e574a3b1204208c9b4f7dbefe92a8984165920b4a400b1c282950e3cc7c649
7
+ data.tar.gz: 30c0701ff39c2e15c2723a0fb4aac188a655c0ceea04c73df7c3097594367e316a563b51a62958cd3e3747f7b0b2ac38caaa7dba0081baac7e3751f4337944c5
@@ -18,6 +18,8 @@ class StreamingLoadJobClass < RubyJobClass
18
18
  optional: true, default: Bricolage::PSQLLoadOptions.new,
19
19
  value_handler: lambda {|value, ctx, vars| Bricolage::PSQLLoadOptions.parse(value) })
20
20
  params.add Bricolage::DataSourceParam.new('s3', 's3-ds', 'S3 data source.')
21
+ params.add Bricolage::StringParam.new('ctl-prefix', 'S3_PREFIX', 'S3 object key prefix for control files. (default: ${queue-path}/ctl)', optional: true)
22
+ params.add Bricolage::OptionalBoolParam.new('keep-ctl', 'Does not delete control files if true.')
21
23
  params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
22
24
  params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
23
25
  params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
@@ -61,6 +63,7 @@ class StreamingLoadJobClass < RubyJobClass
61
63
  RedshiftStreamingLoader.new(
62
64
  data_source: ds,
63
65
  queue: make_s3_queue(params),
66
+ keep_ctl: params['keep-ctl'],
64
67
  table: string(params['dest-table']),
65
68
  work_table: string(params['work-table']),
66
69
  log_table: string(params['log-table']),
@@ -76,6 +79,7 @@ class StreamingLoadJobClass < RubyJobClass
76
79
  ds = params['s3-ds']
77
80
  S3Queue.new(
78
81
  data_source: ds,
82
+ ctl_prefix: (params['ctl-prefix'] || "#{params['queue-path']}/ctl"),
79
83
  queue_path: params['queue-path'],
80
84
  persistent_path: params['persistent-path'],
81
85
  file_name: params['file-name'],
@@ -88,12 +92,13 @@ class StreamingLoadJobClass < RubyJobClass
88
92
  end
89
93
 
90
94
  class RedshiftStreamingLoader
91
- def initialize(data_source:, queue:,
95
+ def initialize(data_source:, queue:, keep_ctl:,
92
96
  table:, work_table: nil, log_table: nil, load_options: nil,
93
97
  sql: nil,
94
98
  logger:, noop: false, load_only: false)
95
99
  @ds = data_source
96
100
  @src = queue
101
+ @keep_ctl = keep_ctl
97
102
  @table = table
98
103
  @work_table = work_table
99
104
  @log_table = log_table
@@ -200,13 +205,17 @@ class StreamingLoadJobClass < RubyJobClass
200
205
  end
201
206
 
202
207
  def create_manifest_file(objects)
203
- manifest_name = "manifest-#{@job_process_id}.json"
208
+ manifest_name = if @keep_ctl
209
+ "#{Time.now.strftime('%Y/%m/%d')}/#{@job_process_id}-#{@table}.json"
210
+ else
211
+ "manifest-#{@job_process_id}.json"
212
+ end
204
213
  @logger.info "creating manifest: #{manifest_name}"
205
214
  json = make_manifest_json(objects)
206
215
  @logger.info "manifest:\n" + json
207
216
  url = @src.put_control_file(manifest_name, json, noop: @noop)
208
217
  yield url
209
- @src.remove_control_file(File.basename(url), noop: @noop)
218
+ @src.remove_control_file(File.basename(url), noop: @noop) unless @keep_ctl
210
219
  end
211
220
 
212
221
  def make_manifest_json(objects)
@@ -359,8 +368,9 @@ class StreamingLoadJobClass < RubyJobClass
359
368
  class S3Queue
360
369
  extend Forwardable
361
370
 
362
- def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
371
+ def initialize(data_source:, ctl_prefix:, queue_path:, persistent_path:, file_name:, logger:)
363
372
  @ds = data_source
373
+ @ctl_prefix = ctl_prefix
364
374
  @queue_path = queue_path
365
375
  @persistent_path = persistent_path
366
376
  @file_name = file_name
@@ -399,7 +409,8 @@ class StreamingLoadJobClass < RubyJobClass
399
409
  end
400
410
 
401
411
  def control_file_path(name)
402
- "#{queue_path}/ctl/#{name}"
412
+ prefix = @ctl_prefix || "#{queue_path}/ctl"
413
+ "#{prefix}/#{name}"
403
414
  end
404
415
 
405
416
  def each(&block)
@@ -183,7 +183,9 @@ module Bricolage
183
183
  write_concurrency: options['write_concurrency'],
184
184
  rotation_size: options['rotation_size'],
185
185
  delete_objects: options['delete_objects'],
186
- object_key_delimiter: options['object_key_delimiter'])
186
+ object_key_delimiter: options['object_key_delimiter'],
187
+ src_zone_offset: options['src_zone_offset'],
188
+ dst_zone_offset: options['dst_zone_offset'])
187
189
  end
188
190
 
189
191
  class S3Export < Action
@@ -195,7 +197,9 @@ module Bricolage
195
197
  write_concurrency: 4,
196
198
  rotation_size: nil,
197
199
  delete_objects: false,
198
- object_key_delimiter: nil)
200
+ object_key_delimiter: nil,
201
+ src_zone_offset: nil,
202
+ dst_zone_offset: nil)
199
203
  @table = table
200
204
  @statement = stmt
201
205
  @s3ds = s3ds
@@ -208,6 +212,8 @@ module Bricolage
208
212
  @rotation_size = rotation_size
209
213
  @delete_objects = delete_objects
210
214
  @object_key_delimiter = object_key_delimiter
215
+ @src_zone_offset = src_zone_offset
216
+ @dst_zone_offset = dst_zone_offset
211
217
  end
212
218
 
213
219
  def run
@@ -250,6 +256,12 @@ module Bricolage
250
256
  params[:r] = @rotation_size if @rotation_size
251
257
  params[:d] = nil if @delete_objects
252
258
  params[:k] = @object_key_delimiter if @object_key_delimiter
259
+ if src_zone_offset = @src_zone_offset || ds.mysql_options[:src_zone_offset]
260
+ params[:S] = src_zone_offset
261
+ end
262
+ if dst_zone_offset = @dst_zone_offset || ds.mysql_options[:dst_zone_offset]
263
+ params[:T] = dst_zone_offset
264
+ end
253
265
  params
254
266
  end
255
267
 
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.19.0'
3
+ VERSION = '5.19.1'
4
4
  end
data/libexec/mys3dump.jar CHANGED
Binary file
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- bricolage (5.19.0)
4
+ bricolage (5.19.1)
5
5
  aws-sdk (~> 2)
6
6
  mysql2
7
7
  pg (~> 0.18.0)
@@ -11,12 +11,12 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- aws-sdk (2.6.7)
15
- aws-sdk-resources (= 2.6.7)
16
- aws-sdk-core (2.6.7)
14
+ aws-sdk (2.6.14)
15
+ aws-sdk-resources (= 2.6.14)
16
+ aws-sdk-core (2.6.14)
17
17
  jmespath (~> 1.0)
18
- aws-sdk-resources (2.6.7)
19
- aws-sdk-core (= 2.6.7)
18
+ aws-sdk-resources (2.6.14)
19
+ aws-sdk-core (= 2.6.14)
20
20
  coderay (1.1.0)
21
21
  fluent-logger (0.5.1)
22
22
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -47,10 +47,10 @@ GEM
47
47
  td-logger (~> 0.3.21)
48
48
  yajl-ruby (~> 1.1)
49
49
  zip-zip (~> 0.3)
50
- td-client (0.8.83)
51
- httpclient (~> 2.7)
50
+ td-client (0.8.84)
51
+ httpclient (>= 2.7)
52
52
  json (>= 1.7.6)
53
- msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
53
+ msgpack (>= 0.5.6, < 2)
54
54
  td-logger (0.3.25)
55
55
  fluent-logger (~> 0.5.0)
56
56
  msgpack (>= 0.4.4, < 0.8.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -67,4 +67,4 @@ DEPENDENCIES
67
67
  pry
68
68
 
69
69
  BUNDLED WITH
70
- 1.13.2
70
+ 1.13.6
@@ -42,16 +42,6 @@ postgres:
42
42
  encoding: utf8
43
43
  sql_log_level: DEBUG
44
44
 
45
- td:
46
- type: td
47
- database: logs
48
- apikey: <%= password 'td_tabemiru' %>
49
-
50
- td_search_log:
51
- type: td
52
- database: search_log
53
- apikey: <%= password 'td_tabemiru' %>
54
-
55
45
  s3:
56
46
  type: s3
57
47
  endpoint: "s3-ap-northeast-1.amazonaws.com"
@@ -74,27 +64,35 @@ mysql:
74
64
  password: <%= password 'mysql_shared_work_readonly' %>
75
65
  encoding: utf8
76
66
 
77
- s3_strload:
78
- type: s3
79
- region: "ap-northeast-1"
80
- endpoint: "s3-ap-northeast-1.amazonaws.com"
81
- bucket: tabemiru-data.ap-northeast-1
82
- prefix: tmp/strload
83
- access_key_id: "<%= password 'aws_access_key_id' %>"
84
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
67
+ sqs_preproc:
68
+ type: sqs
69
+ url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-preproc-dev"
70
+ max_number_of_messages: 10
71
+ visibility_timeout: 60
85
72
 
86
- sqs_strload_event:
73
+ sqs_dispatch:
87
74
  type: sqs
88
75
  url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/log-stream-dev"
89
76
  max_number_of_messages: 10
90
77
  visibility_timeout: 60
91
- access_key_id: "<%= password 'aws_access_key_id' %>"
92
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
93
78
 
94
- sqs_strload_task:
79
+ sqs_task:
95
80
  type: sqs
96
81
  url: "https://sqs.ap-northeast-1.amazonaws.com/789035092620/bricolage-load-tasks-dev"
97
82
  max_number_of_messages: 1
98
83
  visibility_timeout: 180
99
- access_key_id: "<%= password 'aws_access_key_id' %>"
100
- secret_access_key: "<%= password 'aws_secret_access_key' %>"
84
+
85
+ sns:
86
+ type: sns
87
+ region: "ap-northeast-1"
88
+ topic_arn: "arn:aws:sns:ap-northeast-1:789035092620:dwh-service-notification"
89
+
90
+ td:
91
+ type: td
92
+ database: logs
93
+ apikey: <%= password 'td_tabemiru' %>
94
+
95
+ td_search_log:
96
+ type: td
97
+ database: search_log
98
+ apikey: <%= password 'td_tabemiru' %>
@@ -0,0 +1,7 @@
1
+ #!/bin/sh
2
+
3
+ for i in data/2*.txt
4
+ do
5
+ aws s3 cp $i s3://tabemiru-data.ap-northeast-1/dev/queue/
6
+ done
7
+ aws s3 rm --recursive s3://tabemiru-data.ap-northeast-1/dev/save/
@@ -1,12 +1,14 @@
1
1
  class: streaming_load
2
2
 
3
3
  s3-ds: s3
4
+ ctl-prefix: ctl
5
+ keep-ctl: true
4
6
  queue-path: queue
5
- persistent-path: save/year=%Y/month=%m/day=%d/hour=%H
7
+ persistent-path: save/%Y/%m/%d
6
8
  file-name: "%Y%m%d-%H%M_%Q.txt"
7
9
 
8
10
  redshift-ds: sql
9
- dest-table: load_test
10
- work-table: load_test_wk
11
- log-table: load_test_l
11
+ dest-table: $test_schema.load_test
12
+ work-table: $test_schema.load_test_wk
13
+ log-table: $test_schema.load_test_l
12
14
  load-options: "delimiter '\\t'"
@@ -6,10 +6,8 @@ s3-ds: s3
6
6
  s3-prefix: shimpeko/test-abc-
7
7
  gzip: true
8
8
  dump-options:
9
- partition_column: id
10
- partition_number: 8
11
- write_concurrency: 16
12
- rotation_size: 16000000
9
+ src_zone_offset: "+00:00"
10
+ dst_zone_offset: "+09:00"
13
11
  delete_objects: true
14
12
  dest-ds: sql
15
13
  dest-table: $test_schema.users
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.0
4
+ version: 5.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-17 00:00:00.000000000 Z
11
+ date: 2016-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -207,7 +207,7 @@ files:
207
207
  - test/home/data/20141002-1355_02.txt
208
208
  - test/home/data/test.txt
209
209
  - test/home/jobnet-test.rb
210
- - test/home/sqltest.rb
210
+ - test/home/revert.sh
211
211
  - test/home/subsys/d.ct
212
212
  - test/home/subsys/insert.sql.job
213
213
  - test/home/subsys/job1.job
data/test/home/sqltest.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'bricolage/commandlineapplication'
2
- require 'pp'
3
-
4
- app = Bricolage::CommandLineApplication.define {|opts|
5
- opts.data_source_option('--ds', 'Target data source', short: '-D', kind: 'sql')
6
- }
7
- ds = app.data_source('--ds')
8
- ds.open {|conn|
9
- #task_ids = conn.query_values('select task_id from strload_tasks')
10
- #p [task_ids.class, task_ids.size]
11
-
12
- conn.update('create table t (x int)')
13
- pp conn.query_value('select count(*) from t')
14
- conn.update('drop table t')
15
- pp conn.query_value('select count(*) from t')
16
- }