bricolage 5.12.5 → 5.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 741eaa97792bc5c5c755c6cd4e8c4f1545a8be6d
4
- data.tar.gz: 0d69df37b2769c8c08e9ba5da41aeca3a9a9e5e3
3
+ metadata.gz: 8e56d9299cba57f0e086ad58140a5694aaa4d114
4
+ data.tar.gz: 63d3933d15359f340f9e7a7ffa0c50bfc39404b0
5
5
  SHA512:
6
- metadata.gz: 8c956ddbfa6594810ced49c67c9cd5de6e0fe9ce514c50194e904802d2a8b2f5419dce600f160ccf99fcb52d2c0aeeb4b1515f761dc46c388957b177a6590abc
7
- data.tar.gz: 2c0a615f183a1774abdebe80ec2c9d81595a02844f8b94584939f3268854fe421bc7da7c8968ce0acb03f0163e8ab63ca7fe0091d1b715497dfbbef58cceedd2
6
+ metadata.gz: 2a7cf7446da8ac747cba7cb41f56bb593a453bec20a57b6d785daca08314f27161a5a1db7ed9231b432bdedd417a8a9c06f9b6e49de5249371b2c840684d9d8f
7
+ data.tar.gz: 5d86674545cb5b8953e53c465bd5a5c12ab077a0e676c240967d0bcfe668418125c2f2c465622d2498bb6e7ce87109a406094b8a3d093e32fb1e5dfb3ed692cf
@@ -3,6 +3,7 @@ require 'bricolage/psqldatasource'
3
3
  require 'bricolage/exception'
4
4
  require 'json'
5
5
  require 'socket'
6
+ require 'forwardable'
6
7
 
7
8
  class StreamingLoadJobClass < RubyJobClass
8
9
  job_class_id 'streaming_load'
@@ -53,13 +54,15 @@ class StreamingLoadJobClass < RubyJobClass
53
54
 
54
55
  def make_loader(params)
55
56
  ds = params['redshift-ds']
57
+ load_opts = params['load-options']
58
+ load_opts.provide_defaults(params['s3-ds'])
56
59
  RedshiftStreamingLoader.new(
57
60
  data_source: ds,
58
61
  queue: make_s3_queue(params),
59
62
  table: string(params['dest-table']),
60
63
  work_table: string(params['work-table']),
61
64
  log_table: string(params['log-table']),
62
- load_options: params['load-options'],
65
+ load_options: load_opts,
63
66
  sql: params['sql-file'],
64
67
  logger: ds.logger,
65
68
  noop: params['noop'],
@@ -306,6 +309,8 @@ class StreamingLoadJobClass < RubyJobClass
306
309
  end
307
310
 
308
311
  class S3Queue
312
+ extend Forwardable
313
+
309
314
  def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
310
315
  @ds = data_source
311
316
  @queue_path = queue_path
@@ -318,13 +323,15 @@ class StreamingLoadJobClass < RubyJobClass
318
323
  @ds.credential_string
319
324
  end
320
325
 
326
+ def_delegator '@ds', :encryption
327
+
321
328
  attr_reader :queue_path
322
329
 
323
330
  def queue_url
324
331
  @ds.url(@queue_path)
325
332
  end
326
333
 
327
- def object_url(key)
334
+ def object_url_direct(key)
328
335
  @ds.url(key, no_prefix: true)
329
336
  end
330
337
 
@@ -334,7 +341,7 @@ class StreamingLoadJobClass < RubyJobClass
334
341
 
335
342
  def put_control_file(name, data, noop: false)
336
343
  @logger.info "s3 put: #{control_file_url(name)}"
337
- @ds.object(control_file_path(name)).write(data) unless noop
344
+ @ds.object(control_file_path(name)).put(body: data) unless noop
338
345
  control_file_url(name)
339
346
  end
340
347
 
@@ -344,39 +351,27 @@ class StreamingLoadJobClass < RubyJobClass
344
351
  end
345
352
 
346
353
  def control_file_path(name)
347
- "#{queue_path}/#{name}"
348
- end
349
-
350
- def consume_each(noop: false, &block)
351
- each do |obj|
352
- yield obj and obj.save(noop: noop)
353
- end
354
+ "#{queue_path}/ctl/#{name}"
354
355
  end
355
356
 
356
357
  def each(&block)
357
358
  queued_objects.each(&block)
358
359
  end
359
360
 
360
- def queue_directory
361
- @ds.objects_with_prefix(queue_path)
362
- end
363
-
364
- def queued_file_nodes
365
- queue_directory.as_tree.children.select {|node|
366
- node.leaf? and
367
- node.key[-1, 1] != '/' and
368
- target_file_name?(File.basename(node.key))
369
- }
370
- end
371
-
372
361
  def queued_objects
373
- queued_file_nodes.map {|node| LoadableObject.new(self, node, @logger) }
362
+ @ds.traverse(queue_path)
363
+ .select {|obj| target_file_name?(File.basename(obj.key)) }
364
+ .map {|obj| LoadableObject.new(self, obj, @logger) }
374
365
  end
375
366
 
376
367
  def target_file_name?(name)
377
368
  file_name_pattern =~ name
378
369
  end
379
370
 
371
+ def persistent_object(name)
372
+ @ds.object(persistent_path(name), no_prefix: true)
373
+ end
374
+
380
375
  def persistent_path(name)
381
376
  @ds.path("#{format_path(@persistent_path, name)}/#{name}")
382
377
  end
@@ -425,9 +420,9 @@ class StreamingLoadJobClass < RubyJobClass
425
420
  end
426
421
 
427
422
  class LoadableObject
428
- def initialize(s3queue, node, logger)
423
+ def initialize(s3queue, object, logger)
429
424
  @s3queue = s3queue
430
- @node = node
425
+ @object = object
431
426
  @logger = logger
432
427
  end
433
428
 
@@ -436,7 +431,7 @@ class StreamingLoadJobClass < RubyJobClass
436
431
  end
437
432
 
438
433
  def path
439
- @node.key
434
+ @object.key
440
435
  end
441
436
 
442
437
  def basename
@@ -444,20 +439,29 @@ class StreamingLoadJobClass < RubyJobClass
444
439
  end
445
440
 
446
441
  def url
447
- @s3queue.object_url(path)
442
+ @s3queue.object_url_direct(path)
448
443
  end
449
444
 
450
- def save(noop = false)
451
- @logger.info "s3 move: #{path} -> #{save_path}"
445
+ def dequeue(noop = false)
446
+ @logger.info "s3 move: #{path} -> #{persistent_path}"
452
447
  return if noop
453
- @node.object.move_to save_path
448
+ @object.move_to persistent_object, dequeue_options
454
449
  @logger.info "file saved"
455
450
  end
456
451
 
457
- alias dequeue save
452
+ def persistent_object
453
+ @s3queue.persistent_object(basename)
454
+ end
458
455
 
459
- def save_path
456
+ def persistent_path
460
457
  @s3queue.persistent_path(basename)
461
458
  end
459
+
460
+ def dequeue_options
461
+ opts = {
462
+ server_side_encryption: @s3queue.encryption
463
+ }
464
+ opts.reject {|k,v| v.nil? }
465
+ end
462
466
  end
463
467
  end
@@ -322,7 +322,7 @@ module Bricolage
322
322
  unless src_ds.redshift_loader_source?
323
323
  raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
324
324
  end
325
- provide_default_load_options opts, src_ds
325
+ opts.provide_defaults(src_ds)
326
326
  buf = StringIO.new
327
327
  buf.puts "copy #{dest_table}"
328
328
  buf.puts "from '#{src_ds.url(src_path)}'"
@@ -335,12 +335,6 @@ module Bricolage
335
335
  buf.string
336
336
  end
337
337
 
338
- def provide_default_load_options(opts, src_ds)
339
- if src_ds.encrypted? and not opts.key?('encrypted')
340
- opts['encrypted'] = true
341
- end
342
- end
343
-
344
338
  def format_option(fmt, src_ds, jsonpath)
345
339
  case fmt
346
340
  when 'tsv'
@@ -477,6 +471,12 @@ module Bricolage
477
471
  buf.string
478
472
  end
479
473
 
474
+ def provide_defaults(src_ds)
475
+ if src_ds.encrypted? and not key?('encrypted')
476
+ self['encrypted'] = true
477
+ end
478
+ end
479
+
480
480
  class Option
481
481
  def initialize(name, value)
482
482
  @name = name
@@ -8,22 +8,28 @@ module Bricolage
8
8
  class S3DataSource < DataSource
9
9
  declare_type 's3'
10
10
 
11
- def initialize(endpoint: 's3-ap-northeast-1.amazonaws.com',
11
+ def initialize(
12
+ endpoint: 's3-ap-northeast-1.amazonaws.com',
13
+ region: 'ap-northeast-1',
12
14
  bucket: nil, prefix: nil,
13
15
  access_key_id: nil, secret_access_key: nil, master_symmetric_key: nil,
16
+ encryption: nil,
14
17
  s3cfg: nil)
15
- @endpoint = endpoint
16
- @bucket = bucket
18
+ @endpoint = (/\Ahttps?:/ =~ endpoint) ? endpoint : "https://#{endpoint}"
19
+ @region = region
20
+ @bucket_name = bucket
17
21
  @prefix = (prefix && prefix.empty?) ? nil : prefix
18
22
  @access_key_id = access_key_id
19
23
  @secret_access_key = secret_access_key
20
24
  @master_symmetric_key = master_symmetric_key
25
+ @encryption = encryption
21
26
  @s3cfg = s3cfg
22
27
  @configurations = @s3cfg ? load_configurations(@s3cfg) : nil
23
28
  end
24
29
 
25
30
  attr_reader :endpoint
26
- attr_reader :bucket
31
+ attr_reader :region
32
+ attr_reader :bucket_name
27
33
  attr_reader :prefix
28
34
 
29
35
  def new_task
@@ -64,8 +70,10 @@ module Bricolage
64
70
  h
65
71
  end
66
72
 
73
+ attr_reader :encryption
74
+
67
75
  def encrypted?
68
- !!@master_symmetric_key
76
+ !!(@master_symmetric_key or @encryption)
69
77
  end
70
78
 
71
79
  #
@@ -73,29 +81,30 @@ module Bricolage
73
81
  #
74
82
 
75
83
  def client
76
- @client ||= AWS::S3.new(s3_endpoint: endpoint, access_key_id: access_key, secret_access_key: secret_key)
77
- end
78
-
79
- def objects
80
- client.buckets[bucket].objects
84
+ @client ||= Aws::S3::Client.new(region: @region, endpoint: @endpoint, access_key_id: access_key, secret_access_key: secret_key)
81
85
  end
82
86
 
83
- def objects_with_prefix(rel, no_prefix: false)
84
- objects.with_prefix(path(rel, no_prefix: no_prefix))
87
+ def bucket
88
+ @resource ||= Aws::S3::Resource.new(client: client)
89
+ @bucket ||= @resource.bucket(@bucket_name)
85
90
  end
86
91
 
87
92
  def object(rel, no_prefix: false)
88
- objects[path(rel, no_prefix: no_prefix)]
93
+ bucket.object(path(rel, no_prefix: no_prefix))
89
94
  end
90
95
 
91
96
  def url(rel, no_prefix: false)
92
- "s3://#{@bucket}/#{path(rel, no_prefix: no_prefix)}"
97
+ "s3://#{@bucket_name}/#{path(rel, no_prefix: no_prefix)}"
93
98
  end
94
99
 
95
100
  def path(rel, no_prefix: false)
96
- path = (no_prefix || !prefix) ? rel.to_s : "#{@prefix}/#{rel}"
101
+ path = (no_prefix || !@prefix) ? rel.to_s : "#{@prefix}/#{rel}"
97
102
  path.sub(%r<\A/>, '').gsub(%r<//>, '/')
98
103
  end
104
+
105
+ def traverse(rel, no_prefix: false)
106
+ bucket.objects(prefix: path(rel, no_prefix: no_prefix))
107
+ end
99
108
  end
100
109
 
101
110
  class S3Task < DataSourceTask
@@ -143,7 +152,7 @@ module Bricolage
143
152
  raise JobFailure, "no such file: #{@src}" if source_files.empty?
144
153
  each_src_dest do |src, dest|
145
154
  ds.logger.info command_line(src, dest)
146
- ds.object(dest).write(file: src)
155
+ ds.object(dest).upload_file(src)
147
156
  end
148
157
  nil
149
158
  end
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.12.5'
3
+ VERSION = '5.13.0'
4
4
  end
@@ -1,2 +1,3 @@
1
1
  source 'https://rubygems.org/'
2
2
  gem 'bricolage', path: '../..'
3
+ gem 'pry'
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- bricolage (5.12.5)
5
- aws-sdk (< 2)
4
+ bricolage (5.13.0)
5
+ aws-sdk (~> 2)
6
6
  mysql2
7
7
  pg
8
8
  td
@@ -10,35 +10,43 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- aws-sdk (1.66.0)
14
- aws-sdk-v1 (= 1.66.0)
15
- aws-sdk-v1 (1.66.0)
16
- json (~> 1.4)
17
- nokogiri (>= 1.4.4)
13
+ aws-sdk (2.2.6)
14
+ aws-sdk-resources (= 2.2.6)
15
+ aws-sdk-core (2.2.6)
16
+ jmespath (~> 1.0)
17
+ aws-sdk-resources (2.2.6)
18
+ aws-sdk-core (= 2.2.6)
19
+ coderay (1.1.0)
18
20
  fluent-logger (0.4.10)
19
21
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
20
22
  yajl-ruby (~> 1.0)
21
23
  hirb (0.7.3)
22
24
  httpclient (2.5.3.3)
25
+ jmespath (1.1.3)
23
26
  json (1.8.3)
24
- mini_portile (0.6.2)
27
+ method_source (0.8.2)
25
28
  msgpack (0.5.11)
26
- mysql2 (0.4.1)
27
- nokogiri (1.6.6.2)
28
- mini_portile (~> 0.6.0)
29
+ mysql2 (0.4.2)
29
30
  parallel (0.6.5)
30
- pg (0.18.3)
31
+ pg (0.18.4)
32
+ pry (0.10.3)
33
+ coderay (~> 1.1.0)
34
+ method_source (~> 0.8.1)
35
+ slop (~> 3.4)
36
+ ruby-progressbar (1.7.5)
31
37
  rubyzip (1.1.7)
32
- td (0.12.0)
38
+ slop (3.6.0)
39
+ td (0.13.0)
33
40
  hirb (>= 0.4.5)
34
41
  msgpack (>= 0.4.4, < 0.5.12, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
35
42
  parallel (~> 0.6.1)
43
+ ruby-progressbar (~> 1.7.5)
36
44
  rubyzip (~> 1.1.7)
37
- td-client (~> 0.8.75)
45
+ td-client (~> 0.8.76)
38
46
  td-logger (~> 0.3.21)
39
47
  yajl-ruby (~> 1.1)
40
48
  zip-zip (~> 0.3)
41
- td-client (0.8.75)
49
+ td-client (0.8.76)
42
50
  httpclient (>= 2.5.2, < 2.6.0)
43
51
  json (>= 1.7.6)
44
52
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -55,6 +63,7 @@ PLATFORMS
55
63
 
56
64
  DEPENDENCIES
57
65
  bricolage!
66
+ pry
58
67
 
59
68
  BUNDLED WITH
60
69
  1.10.6
@@ -45,6 +45,7 @@ td_search_log:
45
45
  s3:
46
46
  type: s3
47
47
  endpoint: "s3-ap-northeast-1.amazonaws.com"
48
+ region: "ap-northeast-1"
48
49
  bucket: tabemiru-data.ap-northeast-1
49
50
  prefix: "/dev"
50
51
  s3cfg: <%= user_home_relative_path '.s3cfg' %>
@@ -0,0 +1 @@
1
+ test
@@ -8,5 +8,5 @@ in \
8
8
  do
9
9
  aws s3 cp \
10
10
  $(dirname $0)/data/$name \
11
- $S3_TABEMIRU/dev/queue/$name
11
+ $S3_DEV/dev/queue/$name
12
12
  done
@@ -7,6 +7,6 @@ in \
7
7
  20141002-1355_02.txt
8
8
  do
9
9
  aws s3 mv \
10
- s3://tabemiru-data.ap-northeast-1/tmp/save/year=2014/month=10/day=02/hour=13/$name \
11
- s3://tabemiru-data.ap-northeast-1/tmp/queue/$name
10
+ $S3_DEV/dev/save/year=2014/month=10/day=02/hour=13/$name \
11
+ $S3_DEV/dev/queue/$name
12
12
  done
@@ -0,0 +1,3 @@
1
+ class: s3-put
2
+ dest-file: test-dest.txt
3
+ src-file: $bricolage_home/data/test.txt
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.12.5
4
+ version: 5.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-27 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -28,14 +28,14 @@ dependencies:
28
28
  name: aws-sdk
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "<"
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "<"
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2'
41
41
  - !ruby/object:Gem::Dependency
@@ -169,6 +169,7 @@ files:
169
169
  - test/home/data/20141002-1355_00.txt
170
170
  - test/home/data/20141002-1355_01.txt
171
171
  - test/home/data/20141002-1355_02.txt
172
+ - test/home/data/test.txt
172
173
  - test/home/jobnet-test.rb
173
174
  - test/home/put.sh
174
175
  - test/home/revert.sh
@@ -187,6 +188,7 @@ files:
187
188
  - test/home/subsys/migrate.job
188
189
  - test/home/subsys/net1.jobnet
189
190
  - test/home/subsys/net2.jobnet
191
+ - test/home/subsys/put.job
190
192
  - test/home/subsys/raw-vacuum.jobnet
191
193
  - test/home/subsys/raw-vacuum.sql.job
192
194
  - test/home/subsys/rebuild.sql.job