bricolage 5.12.5 → 5.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 741eaa97792bc5c5c755c6cd4e8c4f1545a8be6d
4
- data.tar.gz: 0d69df37b2769c8c08e9ba5da41aeca3a9a9e5e3
3
+ metadata.gz: 8e56d9299cba57f0e086ad58140a5694aaa4d114
4
+ data.tar.gz: 63d3933d15359f340f9e7a7ffa0c50bfc39404b0
5
5
  SHA512:
6
- metadata.gz: 8c956ddbfa6594810ced49c67c9cd5de6e0fe9ce514c50194e904802d2a8b2f5419dce600f160ccf99fcb52d2c0aeeb4b1515f761dc46c388957b177a6590abc
7
- data.tar.gz: 2c0a615f183a1774abdebe80ec2c9d81595a02844f8b94584939f3268854fe421bc7da7c8968ce0acb03f0163e8ab63ca7fe0091d1b715497dfbbef58cceedd2
6
+ metadata.gz: 2a7cf7446da8ac747cba7cb41f56bb593a453bec20a57b6d785daca08314f27161a5a1db7ed9231b432bdedd417a8a9c06f9b6e49de5249371b2c840684d9d8f
7
+ data.tar.gz: 5d86674545cb5b8953e53c465bd5a5c12ab077a0e676c240967d0bcfe668418125c2f2c465622d2498bb6e7ce87109a406094b8a3d093e32fb1e5dfb3ed692cf
@@ -3,6 +3,7 @@ require 'bricolage/psqldatasource'
3
3
  require 'bricolage/exception'
4
4
  require 'json'
5
5
  require 'socket'
6
+ require 'forwardable'
6
7
 
7
8
  class StreamingLoadJobClass < RubyJobClass
8
9
  job_class_id 'streaming_load'
@@ -53,13 +54,15 @@ class StreamingLoadJobClass < RubyJobClass
53
54
 
54
55
  def make_loader(params)
55
56
  ds = params['redshift-ds']
57
+ load_opts = params['load-options']
58
+ load_opts.provide_defaults(params['s3-ds'])
56
59
  RedshiftStreamingLoader.new(
57
60
  data_source: ds,
58
61
  queue: make_s3_queue(params),
59
62
  table: string(params['dest-table']),
60
63
  work_table: string(params['work-table']),
61
64
  log_table: string(params['log-table']),
62
- load_options: params['load-options'],
65
+ load_options: load_opts,
63
66
  sql: params['sql-file'],
64
67
  logger: ds.logger,
65
68
  noop: params['noop'],
@@ -306,6 +309,8 @@ class StreamingLoadJobClass < RubyJobClass
306
309
  end
307
310
 
308
311
  class S3Queue
312
+ extend Forwardable
313
+
309
314
  def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
310
315
  @ds = data_source
311
316
  @queue_path = queue_path
@@ -318,13 +323,15 @@ class StreamingLoadJobClass < RubyJobClass
318
323
  @ds.credential_string
319
324
  end
320
325
 
326
+ def_delegator '@ds', :encryption
327
+
321
328
  attr_reader :queue_path
322
329
 
323
330
  def queue_url
324
331
  @ds.url(@queue_path)
325
332
  end
326
333
 
327
- def object_url(key)
334
+ def object_url_direct(key)
328
335
  @ds.url(key, no_prefix: true)
329
336
  end
330
337
 
@@ -334,7 +341,7 @@ class StreamingLoadJobClass < RubyJobClass
334
341
 
335
342
  def put_control_file(name, data, noop: false)
336
343
  @logger.info "s3 put: #{control_file_url(name)}"
337
- @ds.object(control_file_path(name)).write(data) unless noop
344
+ @ds.object(control_file_path(name)).put(body: data) unless noop
338
345
  control_file_url(name)
339
346
  end
340
347
 
@@ -344,39 +351,27 @@ class StreamingLoadJobClass < RubyJobClass
344
351
  end
345
352
 
346
353
  def control_file_path(name)
347
- "#{queue_path}/#{name}"
348
- end
349
-
350
- def consume_each(noop: false, &block)
351
- each do |obj|
352
- yield obj and obj.save(noop: noop)
353
- end
354
+ "#{queue_path}/ctl/#{name}"
354
355
  end
355
356
 
356
357
  def each(&block)
357
358
  queued_objects.each(&block)
358
359
  end
359
360
 
360
- def queue_directory
361
- @ds.objects_with_prefix(queue_path)
362
- end
363
-
364
- def queued_file_nodes
365
- queue_directory.as_tree.children.select {|node|
366
- node.leaf? and
367
- node.key[-1, 1] != '/' and
368
- target_file_name?(File.basename(node.key))
369
- }
370
- end
371
-
372
361
  def queued_objects
373
- queued_file_nodes.map {|node| LoadableObject.new(self, node, @logger) }
362
+ @ds.traverse(queue_path)
363
+ .select {|obj| target_file_name?(File.basename(obj.key)) }
364
+ .map {|obj| LoadableObject.new(self, obj, @logger) }
374
365
  end
375
366
 
376
367
  def target_file_name?(name)
377
368
  file_name_pattern =~ name
378
369
  end
379
370
 
371
+ def persistent_object(name)
372
+ @ds.object(persistent_path(name), no_prefix: true)
373
+ end
374
+
380
375
  def persistent_path(name)
381
376
  @ds.path("#{format_path(@persistent_path, name)}/#{name}")
382
377
  end
@@ -425,9 +420,9 @@ class StreamingLoadJobClass < RubyJobClass
425
420
  end
426
421
 
427
422
  class LoadableObject
428
- def initialize(s3queue, node, logger)
423
+ def initialize(s3queue, object, logger)
429
424
  @s3queue = s3queue
430
- @node = node
425
+ @object = object
431
426
  @logger = logger
432
427
  end
433
428
 
@@ -436,7 +431,7 @@ class StreamingLoadJobClass < RubyJobClass
436
431
  end
437
432
 
438
433
  def path
439
- @node.key
434
+ @object.key
440
435
  end
441
436
 
442
437
  def basename
@@ -444,20 +439,29 @@ class StreamingLoadJobClass < RubyJobClass
444
439
  end
445
440
 
446
441
  def url
447
- @s3queue.object_url(path)
442
+ @s3queue.object_url_direct(path)
448
443
  end
449
444
 
450
- def save(noop = false)
451
- @logger.info "s3 move: #{path} -> #{save_path}"
445
+ def dequeue(noop = false)
446
+ @logger.info "s3 move: #{path} -> #{persistent_path}"
452
447
  return if noop
453
- @node.object.move_to save_path
448
+ @object.move_to persistent_object, dequeue_options
454
449
  @logger.info "file saved"
455
450
  end
456
451
 
457
- alias dequeue save
452
+ def persistent_object
453
+ @s3queue.persistent_object(basename)
454
+ end
458
455
 
459
- def save_path
456
+ def persistent_path
460
457
  @s3queue.persistent_path(basename)
461
458
  end
459
+
460
+ def dequeue_options
461
+ opts = {
462
+ server_side_encryption: @s3queue.encryption
463
+ }
464
+ opts.reject {|k,v| v.nil? }
465
+ end
462
466
  end
463
467
  end
@@ -322,7 +322,7 @@ module Bricolage
322
322
  unless src_ds.redshift_loader_source?
323
323
  raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
324
324
  end
325
- provide_default_load_options opts, src_ds
325
+ opts.provide_defaults(src_ds)
326
326
  buf = StringIO.new
327
327
  buf.puts "copy #{dest_table}"
328
328
  buf.puts "from '#{src_ds.url(src_path)}'"
@@ -335,12 +335,6 @@ module Bricolage
335
335
  buf.string
336
336
  end
337
337
 
338
- def provide_default_load_options(opts, src_ds)
339
- if src_ds.encrypted? and not opts.key?('encrypted')
340
- opts['encrypted'] = true
341
- end
342
- end
343
-
344
338
  def format_option(fmt, src_ds, jsonpath)
345
339
  case fmt
346
340
  when 'tsv'
@@ -477,6 +471,12 @@ module Bricolage
477
471
  buf.string
478
472
  end
479
473
 
474
+ def provide_defaults(src_ds)
475
+ if src_ds.encrypted? and not key?('encrypted')
476
+ self['encrypted'] = true
477
+ end
478
+ end
479
+
480
480
  class Option
481
481
  def initialize(name, value)
482
482
  @name = name
@@ -8,22 +8,28 @@ module Bricolage
8
8
  class S3DataSource < DataSource
9
9
  declare_type 's3'
10
10
 
11
- def initialize(endpoint: 's3-ap-northeast-1.amazonaws.com',
11
+ def initialize(
12
+ endpoint: 's3-ap-northeast-1.amazonaws.com',
13
+ region: 'ap-northeast-1',
12
14
  bucket: nil, prefix: nil,
13
15
  access_key_id: nil, secret_access_key: nil, master_symmetric_key: nil,
16
+ encryption: nil,
14
17
  s3cfg: nil)
15
- @endpoint = endpoint
16
- @bucket = bucket
18
+ @endpoint = (/\Ahttps?:/ =~ endpoint) ? endpoint : "https://#{endpoint}"
19
+ @region = region
20
+ @bucket_name = bucket
17
21
  @prefix = (prefix && prefix.empty?) ? nil : prefix
18
22
  @access_key_id = access_key_id
19
23
  @secret_access_key = secret_access_key
20
24
  @master_symmetric_key = master_symmetric_key
25
+ @encryption = encryption
21
26
  @s3cfg = s3cfg
22
27
  @configurations = @s3cfg ? load_configurations(@s3cfg) : nil
23
28
  end
24
29
 
25
30
  attr_reader :endpoint
26
- attr_reader :bucket
31
+ attr_reader :region
32
+ attr_reader :bucket_name
27
33
  attr_reader :prefix
28
34
 
29
35
  def new_task
@@ -64,8 +70,10 @@ module Bricolage
64
70
  h
65
71
  end
66
72
 
73
+ attr_reader :encryption
74
+
67
75
  def encrypted?
68
- !!@master_symmetric_key
76
+ !!(@master_symmetric_key or @encryption)
69
77
  end
70
78
 
71
79
  #
@@ -73,29 +81,30 @@ module Bricolage
73
81
  #
74
82
 
75
83
  def client
76
- @client ||= AWS::S3.new(s3_endpoint: endpoint, access_key_id: access_key, secret_access_key: secret_key)
77
- end
78
-
79
- def objects
80
- client.buckets[bucket].objects
84
+ @client ||= Aws::S3::Client.new(region: @region, endpoint: @endpoint, access_key_id: access_key, secret_access_key: secret_key)
81
85
  end
82
86
 
83
- def objects_with_prefix(rel, no_prefix: false)
84
- objects.with_prefix(path(rel, no_prefix: no_prefix))
87
+ def bucket
88
+ @resource ||= Aws::S3::Resource.new(client: client)
89
+ @bucket ||= @resource.bucket(@bucket_name)
85
90
  end
86
91
 
87
92
  def object(rel, no_prefix: false)
88
- objects[path(rel, no_prefix: no_prefix)]
93
+ bucket.object(path(rel, no_prefix: no_prefix))
89
94
  end
90
95
 
91
96
  def url(rel, no_prefix: false)
92
- "s3://#{@bucket}/#{path(rel, no_prefix: no_prefix)}"
97
+ "s3://#{@bucket_name}/#{path(rel, no_prefix: no_prefix)}"
93
98
  end
94
99
 
95
100
  def path(rel, no_prefix: false)
96
- path = (no_prefix || !prefix) ? rel.to_s : "#{@prefix}/#{rel}"
101
+ path = (no_prefix || !@prefix) ? rel.to_s : "#{@prefix}/#{rel}"
97
102
  path.sub(%r<\A/>, '').gsub(%r<//>, '/')
98
103
  end
104
+
105
+ def traverse(rel, no_prefix: false)
106
+ bucket.objects(prefix: path(rel, no_prefix: no_prefix))
107
+ end
99
108
  end
100
109
 
101
110
  class S3Task < DataSourceTask
@@ -143,7 +152,7 @@ module Bricolage
143
152
  raise JobFailure, "no such file: #{@src}" if source_files.empty?
144
153
  each_src_dest do |src, dest|
145
154
  ds.logger.info command_line(src, dest)
146
- ds.object(dest).write(file: src)
155
+ ds.object(dest).upload_file(src)
147
156
  end
148
157
  nil
149
158
  end
@@ -1,4 +1,4 @@
1
1
  module Bricolage
2
2
  APPLICATION_NAME = 'Bricolage'
3
- VERSION = '5.12.5'
3
+ VERSION = '5.13.0'
4
4
  end
@@ -1,2 +1,3 @@
1
1
  source 'https://rubygems.org/'
2
2
  gem 'bricolage', path: '../..'
3
+ gem 'pry'
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: ../..
3
3
  specs:
4
- bricolage (5.12.5)
5
- aws-sdk (< 2)
4
+ bricolage (5.13.0)
5
+ aws-sdk (~> 2)
6
6
  mysql2
7
7
  pg
8
8
  td
@@ -10,35 +10,43 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- aws-sdk (1.66.0)
14
- aws-sdk-v1 (= 1.66.0)
15
- aws-sdk-v1 (1.66.0)
16
- json (~> 1.4)
17
- nokogiri (>= 1.4.4)
13
+ aws-sdk (2.2.6)
14
+ aws-sdk-resources (= 2.2.6)
15
+ aws-sdk-core (2.2.6)
16
+ jmespath (~> 1.0)
17
+ aws-sdk-resources (2.2.6)
18
+ aws-sdk-core (= 2.2.6)
19
+ coderay (1.1.0)
18
20
  fluent-logger (0.4.10)
19
21
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
20
22
  yajl-ruby (~> 1.0)
21
23
  hirb (0.7.3)
22
24
  httpclient (2.5.3.3)
25
+ jmespath (1.1.3)
23
26
  json (1.8.3)
24
- mini_portile (0.6.2)
27
+ method_source (0.8.2)
25
28
  msgpack (0.5.11)
26
- mysql2 (0.4.1)
27
- nokogiri (1.6.6.2)
28
- mini_portile (~> 0.6.0)
29
+ mysql2 (0.4.2)
29
30
  parallel (0.6.5)
30
- pg (0.18.3)
31
+ pg (0.18.4)
32
+ pry (0.10.3)
33
+ coderay (~> 1.1.0)
34
+ method_source (~> 0.8.1)
35
+ slop (~> 3.4)
36
+ ruby-progressbar (1.7.5)
31
37
  rubyzip (1.1.7)
32
- td (0.12.0)
38
+ slop (3.6.0)
39
+ td (0.13.0)
33
40
  hirb (>= 0.4.5)
34
41
  msgpack (>= 0.4.4, < 0.5.12, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
35
42
  parallel (~> 0.6.1)
43
+ ruby-progressbar (~> 1.7.5)
36
44
  rubyzip (~> 1.1.7)
37
- td-client (~> 0.8.75)
45
+ td-client (~> 0.8.76)
38
46
  td-logger (~> 0.3.21)
39
47
  yajl-ruby (~> 1.1)
40
48
  zip-zip (~> 0.3)
41
- td-client (0.8.75)
49
+ td-client (0.8.76)
42
50
  httpclient (>= 2.5.2, < 2.6.0)
43
51
  json (>= 1.7.6)
44
52
  msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
@@ -55,6 +63,7 @@ PLATFORMS
55
63
 
56
64
  DEPENDENCIES
57
65
  bricolage!
66
+ pry
58
67
 
59
68
  BUNDLED WITH
60
69
  1.10.6
@@ -45,6 +45,7 @@ td_search_log:
45
45
  s3:
46
46
  type: s3
47
47
  endpoint: "s3-ap-northeast-1.amazonaws.com"
48
+ region: "ap-northeast-1"
48
49
  bucket: tabemiru-data.ap-northeast-1
49
50
  prefix: "/dev"
50
51
  s3cfg: <%= user_home_relative_path '.s3cfg' %>
@@ -0,0 +1 @@
1
+ test
@@ -8,5 +8,5 @@ in \
8
8
  do
9
9
  aws s3 cp \
10
10
  $(dirname $0)/data/$name \
11
- $S3_TABEMIRU/dev/queue/$name
11
+ $S3_DEV/dev/queue/$name
12
12
  done
@@ -7,6 +7,6 @@ in \
7
7
  20141002-1355_02.txt
8
8
  do
9
9
  aws s3 mv \
10
- s3://tabemiru-data.ap-northeast-1/tmp/save/year=2014/month=10/day=02/hour=13/$name \
11
- s3://tabemiru-data.ap-northeast-1/tmp/queue/$name
10
+ $S3_DEV/dev/save/year=2014/month=10/day=02/hour=13/$name \
11
+ $S3_DEV/dev/queue/$name
12
12
  done
@@ -0,0 +1,3 @@
1
+ class: s3-put
2
+ dest-file: test-dest.txt
3
+ src-file: $bricolage_home/data/test.txt
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bricolage
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.12.5
4
+ version: 5.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Minero Aoki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-27 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -28,14 +28,14 @@ dependencies:
28
28
  name: aws-sdk
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "<"
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "<"
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2'
41
41
  - !ruby/object:Gem::Dependency
@@ -169,6 +169,7 @@ files:
169
169
  - test/home/data/20141002-1355_00.txt
170
170
  - test/home/data/20141002-1355_01.txt
171
171
  - test/home/data/20141002-1355_02.txt
172
+ - test/home/data/test.txt
172
173
  - test/home/jobnet-test.rb
173
174
  - test/home/put.sh
174
175
  - test/home/revert.sh
@@ -187,6 +188,7 @@ files:
187
188
  - test/home/subsys/migrate.job
188
189
  - test/home/subsys/net1.jobnet
189
190
  - test/home/subsys/net2.jobnet
191
+ - test/home/subsys/put.job
190
192
  - test/home/subsys/raw-vacuum.jobnet
191
193
  - test/home/subsys/raw-vacuum.sql.job
192
194
  - test/home/subsys/rebuild.sql.job