bricolage 5.12.5 → 5.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/jobclass/streaming_load.rb +36 -32
- data/lib/bricolage/psqldatasource.rb +7 -7
- data/lib/bricolage/s3datasource.rb +25 -16
- data/lib/bricolage/version.rb +1 -1
- data/test/home/Gemfile +1 -0
- data/test/home/Gemfile.lock +24 -15
- data/test/home/config/development/database.yml +1 -0
- data/test/home/data/test.txt +1 -0
- data/test/home/put.sh +1 -1
- data/test/home/revert.sh +2 -2
- data/test/home/subsys/put.job +3 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e56d9299cba57f0e086ad58140a5694aaa4d114
|
4
|
+
data.tar.gz: 63d3933d15359f340f9e7a7ffa0c50bfc39404b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a7cf7446da8ac747cba7cb41f56bb593a453bec20a57b6d785daca08314f27161a5a1db7ed9231b432bdedd417a8a9c06f9b6e49de5249371b2c840684d9d8f
|
7
|
+
data.tar.gz: 5d86674545cb5b8953e53c465bd5a5c12ab077a0e676c240967d0bcfe668418125c2f2c465622d2498bb6e7ce87109a406094b8a3d093e32fb1e5dfb3ed692cf
|
data/jobclass/streaming_load.rb
CHANGED
@@ -3,6 +3,7 @@ require 'bricolage/psqldatasource'
|
|
3
3
|
require 'bricolage/exception'
|
4
4
|
require 'json'
|
5
5
|
require 'socket'
|
6
|
+
require 'forwardable'
|
6
7
|
|
7
8
|
class StreamingLoadJobClass < RubyJobClass
|
8
9
|
job_class_id 'streaming_load'
|
@@ -53,13 +54,15 @@ class StreamingLoadJobClass < RubyJobClass
|
|
53
54
|
|
54
55
|
def make_loader(params)
|
55
56
|
ds = params['redshift-ds']
|
57
|
+
load_opts = params['load-options']
|
58
|
+
load_opts.provide_defaults(params['s3-ds'])
|
56
59
|
RedshiftStreamingLoader.new(
|
57
60
|
data_source: ds,
|
58
61
|
queue: make_s3_queue(params),
|
59
62
|
table: string(params['dest-table']),
|
60
63
|
work_table: string(params['work-table']),
|
61
64
|
log_table: string(params['log-table']),
|
62
|
-
load_options:
|
65
|
+
load_options: load_opts,
|
63
66
|
sql: params['sql-file'],
|
64
67
|
logger: ds.logger,
|
65
68
|
noop: params['noop'],
|
@@ -306,6 +309,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
306
309
|
end
|
307
310
|
|
308
311
|
class S3Queue
|
312
|
+
extend Forwardable
|
313
|
+
|
309
314
|
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
310
315
|
@ds = data_source
|
311
316
|
@queue_path = queue_path
|
@@ -318,13 +323,15 @@ class StreamingLoadJobClass < RubyJobClass
|
|
318
323
|
@ds.credential_string
|
319
324
|
end
|
320
325
|
|
326
|
+
def_delegator '@ds', :encryption
|
327
|
+
|
321
328
|
attr_reader :queue_path
|
322
329
|
|
323
330
|
def queue_url
|
324
331
|
@ds.url(@queue_path)
|
325
332
|
end
|
326
333
|
|
327
|
-
def
|
334
|
+
def object_url_direct(key)
|
328
335
|
@ds.url(key, no_prefix: true)
|
329
336
|
end
|
330
337
|
|
@@ -334,7 +341,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
334
341
|
|
335
342
|
def put_control_file(name, data, noop: false)
|
336
343
|
@logger.info "s3 put: #{control_file_url(name)}"
|
337
|
-
@ds.object(control_file_path(name)).
|
344
|
+
@ds.object(control_file_path(name)).put(body: data) unless noop
|
338
345
|
control_file_url(name)
|
339
346
|
end
|
340
347
|
|
@@ -344,39 +351,27 @@ class StreamingLoadJobClass < RubyJobClass
|
|
344
351
|
end
|
345
352
|
|
346
353
|
def control_file_path(name)
|
347
|
-
"#{queue_path}/#{name}"
|
348
|
-
end
|
349
|
-
|
350
|
-
def consume_each(noop: false, &block)
|
351
|
-
each do |obj|
|
352
|
-
yield obj and obj.save(noop: noop)
|
353
|
-
end
|
354
|
+
"#{queue_path}/ctl/#{name}"
|
354
355
|
end
|
355
356
|
|
356
357
|
def each(&block)
|
357
358
|
queued_objects.each(&block)
|
358
359
|
end
|
359
360
|
|
360
|
-
def queue_directory
|
361
|
-
@ds.objects_with_prefix(queue_path)
|
362
|
-
end
|
363
|
-
|
364
|
-
def queued_file_nodes
|
365
|
-
queue_directory.as_tree.children.select {|node|
|
366
|
-
node.leaf? and
|
367
|
-
node.key[-1, 1] != '/' and
|
368
|
-
target_file_name?(File.basename(node.key))
|
369
|
-
}
|
370
|
-
end
|
371
|
-
|
372
361
|
def queued_objects
|
373
|
-
|
362
|
+
@ds.traverse(queue_path)
|
363
|
+
.select {|obj| target_file_name?(File.basename(obj.key)) }
|
364
|
+
.map {|obj| LoadableObject.new(self, obj, @logger) }
|
374
365
|
end
|
375
366
|
|
376
367
|
def target_file_name?(name)
|
377
368
|
file_name_pattern =~ name
|
378
369
|
end
|
379
370
|
|
371
|
+
def persistent_object(name)
|
372
|
+
@ds.object(persistent_path(name), no_prefix: true)
|
373
|
+
end
|
374
|
+
|
380
375
|
def persistent_path(name)
|
381
376
|
@ds.path("#{format_path(@persistent_path, name)}/#{name}")
|
382
377
|
end
|
@@ -425,9 +420,9 @@ class StreamingLoadJobClass < RubyJobClass
|
|
425
420
|
end
|
426
421
|
|
427
422
|
class LoadableObject
|
428
|
-
def initialize(s3queue,
|
423
|
+
def initialize(s3queue, object, logger)
|
429
424
|
@s3queue = s3queue
|
430
|
-
@
|
425
|
+
@object = object
|
431
426
|
@logger = logger
|
432
427
|
end
|
433
428
|
|
@@ -436,7 +431,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
436
431
|
end
|
437
432
|
|
438
433
|
def path
|
439
|
-
@
|
434
|
+
@object.key
|
440
435
|
end
|
441
436
|
|
442
437
|
def basename
|
@@ -444,20 +439,29 @@ class StreamingLoadJobClass < RubyJobClass
|
|
444
439
|
end
|
445
440
|
|
446
441
|
def url
|
447
|
-
@s3queue.
|
442
|
+
@s3queue.object_url_direct(path)
|
448
443
|
end
|
449
444
|
|
450
|
-
def
|
451
|
-
@logger.info "s3 move: #{path} -> #{
|
445
|
+
def dequeue(noop = false)
|
446
|
+
@logger.info "s3 move: #{path} -> #{persistent_path}"
|
452
447
|
return if noop
|
453
|
-
@
|
448
|
+
@object.move_to persistent_object, dequeue_options
|
454
449
|
@logger.info "file saved"
|
455
450
|
end
|
456
451
|
|
457
|
-
|
452
|
+
def persistent_object
|
453
|
+
@s3queue.persistent_object(basename)
|
454
|
+
end
|
458
455
|
|
459
|
-
def
|
456
|
+
def persistent_path
|
460
457
|
@s3queue.persistent_path(basename)
|
461
458
|
end
|
459
|
+
|
460
|
+
def dequeue_options
|
461
|
+
opts = {
|
462
|
+
server_side_encryption: @s3queue.encryption
|
463
|
+
}
|
464
|
+
opts.reject {|k,v| v.nil? }
|
465
|
+
end
|
462
466
|
end
|
463
467
|
end
|
@@ -322,7 +322,7 @@ module Bricolage
|
|
322
322
|
unless src_ds.redshift_loader_source?
|
323
323
|
raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
|
324
324
|
end
|
325
|
-
|
325
|
+
opts.provide_defaults(src_ds)
|
326
326
|
buf = StringIO.new
|
327
327
|
buf.puts "copy #{dest_table}"
|
328
328
|
buf.puts "from '#{src_ds.url(src_path)}'"
|
@@ -335,12 +335,6 @@ module Bricolage
|
|
335
335
|
buf.string
|
336
336
|
end
|
337
337
|
|
338
|
-
def provide_default_load_options(opts, src_ds)
|
339
|
-
if src_ds.encrypted? and not opts.key?('encrypted')
|
340
|
-
opts['encrypted'] = true
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
338
|
def format_option(fmt, src_ds, jsonpath)
|
345
339
|
case fmt
|
346
340
|
when 'tsv'
|
@@ -477,6 +471,12 @@ module Bricolage
|
|
477
471
|
buf.string
|
478
472
|
end
|
479
473
|
|
474
|
+
def provide_defaults(src_ds)
|
475
|
+
if src_ds.encrypted? and not key?('encrypted')
|
476
|
+
self['encrypted'] = true
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
480
|
class Option
|
481
481
|
def initialize(name, value)
|
482
482
|
@name = name
|
@@ -8,22 +8,28 @@ module Bricolage
|
|
8
8
|
class S3DataSource < DataSource
|
9
9
|
declare_type 's3'
|
10
10
|
|
11
|
-
def initialize(
|
11
|
+
def initialize(
|
12
|
+
endpoint: 's3-ap-northeast-1.amazonaws.com',
|
13
|
+
region: 'ap-northeast-1',
|
12
14
|
bucket: nil, prefix: nil,
|
13
15
|
access_key_id: nil, secret_access_key: nil, master_symmetric_key: nil,
|
16
|
+
encryption: nil,
|
14
17
|
s3cfg: nil)
|
15
|
-
@endpoint = endpoint
|
16
|
-
@
|
18
|
+
@endpoint = (/\Ahttps?:/ =~ endpoint) ? endpoint : "https://#{endpoint}"
|
19
|
+
@region = region
|
20
|
+
@bucket_name = bucket
|
17
21
|
@prefix = (prefix && prefix.empty?) ? nil : prefix
|
18
22
|
@access_key_id = access_key_id
|
19
23
|
@secret_access_key = secret_access_key
|
20
24
|
@master_symmetric_key = master_symmetric_key
|
25
|
+
@encryption = encryption
|
21
26
|
@s3cfg = s3cfg
|
22
27
|
@configurations = @s3cfg ? load_configurations(@s3cfg) : nil
|
23
28
|
end
|
24
29
|
|
25
30
|
attr_reader :endpoint
|
26
|
-
attr_reader :
|
31
|
+
attr_reader :region
|
32
|
+
attr_reader :bucket_name
|
27
33
|
attr_reader :prefix
|
28
34
|
|
29
35
|
def new_task
|
@@ -64,8 +70,10 @@ module Bricolage
|
|
64
70
|
h
|
65
71
|
end
|
66
72
|
|
73
|
+
attr_reader :encryption
|
74
|
+
|
67
75
|
def encrypted?
|
68
|
-
|
76
|
+
!!(@master_symmetric_key or @encryption)
|
69
77
|
end
|
70
78
|
|
71
79
|
#
|
@@ -73,29 +81,30 @@ module Bricolage
|
|
73
81
|
#
|
74
82
|
|
75
83
|
def client
|
76
|
-
@client ||=
|
77
|
-
end
|
78
|
-
|
79
|
-
def objects
|
80
|
-
client.buckets[bucket].objects
|
84
|
+
@client ||= Aws::S3::Client.new(region: @region, endpoint: @endpoint, access_key_id: access_key, secret_access_key: secret_key)
|
81
85
|
end
|
82
86
|
|
83
|
-
def
|
84
|
-
|
87
|
+
def bucket
|
88
|
+
@resource ||= Aws::S3::Resource.new(client: client)
|
89
|
+
@bucket ||= @resource.bucket(@bucket_name)
|
85
90
|
end
|
86
91
|
|
87
92
|
def object(rel, no_prefix: false)
|
88
|
-
|
93
|
+
bucket.object(path(rel, no_prefix: no_prefix))
|
89
94
|
end
|
90
95
|
|
91
96
|
def url(rel, no_prefix: false)
|
92
|
-
"s3://#{@
|
97
|
+
"s3://#{@bucket_name}/#{path(rel, no_prefix: no_prefix)}"
|
93
98
|
end
|
94
99
|
|
95
100
|
def path(rel, no_prefix: false)
|
96
|
-
path = (no_prefix ||
|
101
|
+
path = (no_prefix || !@prefix) ? rel.to_s : "#{@prefix}/#{rel}"
|
97
102
|
path.sub(%r<\A/>, '').gsub(%r<//>, '/')
|
98
103
|
end
|
104
|
+
|
105
|
+
def traverse(rel, no_prefix: false)
|
106
|
+
bucket.objects(prefix: path(rel, no_prefix: no_prefix))
|
107
|
+
end
|
99
108
|
end
|
100
109
|
|
101
110
|
class S3Task < DataSourceTask
|
@@ -143,7 +152,7 @@ module Bricolage
|
|
143
152
|
raise JobFailure, "no such file: #{@src}" if source_files.empty?
|
144
153
|
each_src_dest do |src, dest|
|
145
154
|
ds.logger.info command_line(src, dest)
|
146
|
-
ds.object(dest).
|
155
|
+
ds.object(dest).upload_file(src)
|
147
156
|
end
|
148
157
|
nil
|
149
158
|
end
|
data/lib/bricolage/version.rb
CHANGED
data/test/home/Gemfile
CHANGED
data/test/home/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: ../..
|
3
3
|
specs:
|
4
|
-
bricolage (5.
|
5
|
-
aws-sdk (
|
4
|
+
bricolage (5.13.0)
|
5
|
+
aws-sdk (~> 2)
|
6
6
|
mysql2
|
7
7
|
pg
|
8
8
|
td
|
@@ -10,35 +10,43 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
aws-sdk (
|
14
|
-
aws-sdk-
|
15
|
-
aws-sdk-
|
16
|
-
|
17
|
-
|
13
|
+
aws-sdk (2.2.6)
|
14
|
+
aws-sdk-resources (= 2.2.6)
|
15
|
+
aws-sdk-core (2.2.6)
|
16
|
+
jmespath (~> 1.0)
|
17
|
+
aws-sdk-resources (2.2.6)
|
18
|
+
aws-sdk-core (= 2.2.6)
|
19
|
+
coderay (1.1.0)
|
18
20
|
fluent-logger (0.4.10)
|
19
21
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
20
22
|
yajl-ruby (~> 1.0)
|
21
23
|
hirb (0.7.3)
|
22
24
|
httpclient (2.5.3.3)
|
25
|
+
jmespath (1.1.3)
|
23
26
|
json (1.8.3)
|
24
|
-
|
27
|
+
method_source (0.8.2)
|
25
28
|
msgpack (0.5.11)
|
26
|
-
mysql2 (0.4.
|
27
|
-
nokogiri (1.6.6.2)
|
28
|
-
mini_portile (~> 0.6.0)
|
29
|
+
mysql2 (0.4.2)
|
29
30
|
parallel (0.6.5)
|
30
|
-
pg (0.18.
|
31
|
+
pg (0.18.4)
|
32
|
+
pry (0.10.3)
|
33
|
+
coderay (~> 1.1.0)
|
34
|
+
method_source (~> 0.8.1)
|
35
|
+
slop (~> 3.4)
|
36
|
+
ruby-progressbar (1.7.5)
|
31
37
|
rubyzip (1.1.7)
|
32
|
-
|
38
|
+
slop (3.6.0)
|
39
|
+
td (0.13.0)
|
33
40
|
hirb (>= 0.4.5)
|
34
41
|
msgpack (>= 0.4.4, < 0.5.12, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
35
42
|
parallel (~> 0.6.1)
|
43
|
+
ruby-progressbar (~> 1.7.5)
|
36
44
|
rubyzip (~> 1.1.7)
|
37
|
-
td-client (~> 0.8.
|
45
|
+
td-client (~> 0.8.76)
|
38
46
|
td-logger (~> 0.3.21)
|
39
47
|
yajl-ruby (~> 1.1)
|
40
48
|
zip-zip (~> 0.3)
|
41
|
-
td-client (0.8.
|
49
|
+
td-client (0.8.76)
|
42
50
|
httpclient (>= 2.5.2, < 2.6.0)
|
43
51
|
json (>= 1.7.6)
|
44
52
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
@@ -55,6 +63,7 @@ PLATFORMS
|
|
55
63
|
|
56
64
|
DEPENDENCIES
|
57
65
|
bricolage!
|
66
|
+
pry
|
58
67
|
|
59
68
|
BUNDLED WITH
|
60
69
|
1.10.6
|
@@ -0,0 +1 @@
|
|
1
|
+
test
|
data/test/home/put.sh
CHANGED
data/test/home/revert.sh
CHANGED
@@ -7,6 +7,6 @@ in \
|
|
7
7
|
20141002-1355_02.txt
|
8
8
|
do
|
9
9
|
aws s3 mv \
|
10
|
-
|
11
|
-
|
10
|
+
$S3_DEV/dev/save/year=2014/month=10/day=02/hour=13/$name \
|
11
|
+
$S3_DEV/dev/queue/$name
|
12
12
|
done
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -28,14 +28,14 @@ dependencies:
|
|
28
28
|
name: aws-sdk
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '2'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2'
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -169,6 +169,7 @@ files:
|
|
169
169
|
- test/home/data/20141002-1355_00.txt
|
170
170
|
- test/home/data/20141002-1355_01.txt
|
171
171
|
- test/home/data/20141002-1355_02.txt
|
172
|
+
- test/home/data/test.txt
|
172
173
|
- test/home/jobnet-test.rb
|
173
174
|
- test/home/put.sh
|
174
175
|
- test/home/revert.sh
|
@@ -187,6 +188,7 @@ files:
|
|
187
188
|
- test/home/subsys/migrate.job
|
188
189
|
- test/home/subsys/net1.jobnet
|
189
190
|
- test/home/subsys/net2.jobnet
|
191
|
+
- test/home/subsys/put.job
|
190
192
|
- test/home/subsys/raw-vacuum.jobnet
|
191
193
|
- test/home/subsys/raw-vacuum.sql.job
|
192
194
|
- test/home/subsys/rebuild.sql.job
|