bricolage 5.12.5 → 5.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/jobclass/streaming_load.rb +36 -32
- data/lib/bricolage/psqldatasource.rb +7 -7
- data/lib/bricolage/s3datasource.rb +25 -16
- data/lib/bricolage/version.rb +1 -1
- data/test/home/Gemfile +1 -0
- data/test/home/Gemfile.lock +24 -15
- data/test/home/config/development/database.yml +1 -0
- data/test/home/data/test.txt +1 -0
- data/test/home/put.sh +1 -1
- data/test/home/revert.sh +2 -2
- data/test/home/subsys/put.job +3 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e56d9299cba57f0e086ad58140a5694aaa4d114
|
4
|
+
data.tar.gz: 63d3933d15359f340f9e7a7ffa0c50bfc39404b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a7cf7446da8ac747cba7cb41f56bb593a453bec20a57b6d785daca08314f27161a5a1db7ed9231b432bdedd417a8a9c06f9b6e49de5249371b2c840684d9d8f
|
7
|
+
data.tar.gz: 5d86674545cb5b8953e53c465bd5a5c12ab077a0e676c240967d0bcfe668418125c2f2c465622d2498bb6e7ce87109a406094b8a3d093e32fb1e5dfb3ed692cf
|
data/jobclass/streaming_load.rb
CHANGED
@@ -3,6 +3,7 @@ require 'bricolage/psqldatasource'
|
|
3
3
|
require 'bricolage/exception'
|
4
4
|
require 'json'
|
5
5
|
require 'socket'
|
6
|
+
require 'forwardable'
|
6
7
|
|
7
8
|
class StreamingLoadJobClass < RubyJobClass
|
8
9
|
job_class_id 'streaming_load'
|
@@ -53,13 +54,15 @@ class StreamingLoadJobClass < RubyJobClass
|
|
53
54
|
|
54
55
|
def make_loader(params)
|
55
56
|
ds = params['redshift-ds']
|
57
|
+
load_opts = params['load-options']
|
58
|
+
load_opts.provide_defaults(params['s3-ds'])
|
56
59
|
RedshiftStreamingLoader.new(
|
57
60
|
data_source: ds,
|
58
61
|
queue: make_s3_queue(params),
|
59
62
|
table: string(params['dest-table']),
|
60
63
|
work_table: string(params['work-table']),
|
61
64
|
log_table: string(params['log-table']),
|
62
|
-
load_options:
|
65
|
+
load_options: load_opts,
|
63
66
|
sql: params['sql-file'],
|
64
67
|
logger: ds.logger,
|
65
68
|
noop: params['noop'],
|
@@ -306,6 +309,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
306
309
|
end
|
307
310
|
|
308
311
|
class S3Queue
|
312
|
+
extend Forwardable
|
313
|
+
|
309
314
|
def initialize(data_source:, queue_path:, persistent_path:, file_name:, logger:)
|
310
315
|
@ds = data_source
|
311
316
|
@queue_path = queue_path
|
@@ -318,13 +323,15 @@ class StreamingLoadJobClass < RubyJobClass
|
|
318
323
|
@ds.credential_string
|
319
324
|
end
|
320
325
|
|
326
|
+
def_delegator '@ds', :encryption
|
327
|
+
|
321
328
|
attr_reader :queue_path
|
322
329
|
|
323
330
|
def queue_url
|
324
331
|
@ds.url(@queue_path)
|
325
332
|
end
|
326
333
|
|
327
|
-
def
|
334
|
+
def object_url_direct(key)
|
328
335
|
@ds.url(key, no_prefix: true)
|
329
336
|
end
|
330
337
|
|
@@ -334,7 +341,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
334
341
|
|
335
342
|
def put_control_file(name, data, noop: false)
|
336
343
|
@logger.info "s3 put: #{control_file_url(name)}"
|
337
|
-
@ds.object(control_file_path(name)).
|
344
|
+
@ds.object(control_file_path(name)).put(body: data) unless noop
|
338
345
|
control_file_url(name)
|
339
346
|
end
|
340
347
|
|
@@ -344,39 +351,27 @@ class StreamingLoadJobClass < RubyJobClass
|
|
344
351
|
end
|
345
352
|
|
346
353
|
def control_file_path(name)
|
347
|
-
"#{queue_path}/#{name}"
|
348
|
-
end
|
349
|
-
|
350
|
-
def consume_each(noop: false, &block)
|
351
|
-
each do |obj|
|
352
|
-
yield obj and obj.save(noop: noop)
|
353
|
-
end
|
354
|
+
"#{queue_path}/ctl/#{name}"
|
354
355
|
end
|
355
356
|
|
356
357
|
def each(&block)
|
357
358
|
queued_objects.each(&block)
|
358
359
|
end
|
359
360
|
|
360
|
-
def queue_directory
|
361
|
-
@ds.objects_with_prefix(queue_path)
|
362
|
-
end
|
363
|
-
|
364
|
-
def queued_file_nodes
|
365
|
-
queue_directory.as_tree.children.select {|node|
|
366
|
-
node.leaf? and
|
367
|
-
node.key[-1, 1] != '/' and
|
368
|
-
target_file_name?(File.basename(node.key))
|
369
|
-
}
|
370
|
-
end
|
371
|
-
|
372
361
|
def queued_objects
|
373
|
-
|
362
|
+
@ds.traverse(queue_path)
|
363
|
+
.select {|obj| target_file_name?(File.basename(obj.key)) }
|
364
|
+
.map {|obj| LoadableObject.new(self, obj, @logger) }
|
374
365
|
end
|
375
366
|
|
376
367
|
def target_file_name?(name)
|
377
368
|
file_name_pattern =~ name
|
378
369
|
end
|
379
370
|
|
371
|
+
def persistent_object(name)
|
372
|
+
@ds.object(persistent_path(name), no_prefix: true)
|
373
|
+
end
|
374
|
+
|
380
375
|
def persistent_path(name)
|
381
376
|
@ds.path("#{format_path(@persistent_path, name)}/#{name}")
|
382
377
|
end
|
@@ -425,9 +420,9 @@ class StreamingLoadJobClass < RubyJobClass
|
|
425
420
|
end
|
426
421
|
|
427
422
|
class LoadableObject
|
428
|
-
def initialize(s3queue,
|
423
|
+
def initialize(s3queue, object, logger)
|
429
424
|
@s3queue = s3queue
|
430
|
-
@
|
425
|
+
@object = object
|
431
426
|
@logger = logger
|
432
427
|
end
|
433
428
|
|
@@ -436,7 +431,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
436
431
|
end
|
437
432
|
|
438
433
|
def path
|
439
|
-
@
|
434
|
+
@object.key
|
440
435
|
end
|
441
436
|
|
442
437
|
def basename
|
@@ -444,20 +439,29 @@ class StreamingLoadJobClass < RubyJobClass
|
|
444
439
|
end
|
445
440
|
|
446
441
|
def url
|
447
|
-
@s3queue.
|
442
|
+
@s3queue.object_url_direct(path)
|
448
443
|
end
|
449
444
|
|
450
|
-
def
|
451
|
-
@logger.info "s3 move: #{path} -> #{
|
445
|
+
def dequeue(noop = false)
|
446
|
+
@logger.info "s3 move: #{path} -> #{persistent_path}"
|
452
447
|
return if noop
|
453
|
-
@
|
448
|
+
@object.move_to persistent_object, dequeue_options
|
454
449
|
@logger.info "file saved"
|
455
450
|
end
|
456
451
|
|
457
|
-
|
452
|
+
def persistent_object
|
453
|
+
@s3queue.persistent_object(basename)
|
454
|
+
end
|
458
455
|
|
459
|
-
def
|
456
|
+
def persistent_path
|
460
457
|
@s3queue.persistent_path(basename)
|
461
458
|
end
|
459
|
+
|
460
|
+
def dequeue_options
|
461
|
+
opts = {
|
462
|
+
server_side_encryption: @s3queue.encryption
|
463
|
+
}
|
464
|
+
opts.reject {|k,v| v.nil? }
|
465
|
+
end
|
462
466
|
end
|
463
467
|
end
|
@@ -322,7 +322,7 @@ module Bricolage
|
|
322
322
|
unless src_ds.redshift_loader_source?
|
323
323
|
raise ParameterError, "input data source does not support redshift as bulk loading source: #{src_ds.name}"
|
324
324
|
end
|
325
|
-
|
325
|
+
opts.provide_defaults(src_ds)
|
326
326
|
buf = StringIO.new
|
327
327
|
buf.puts "copy #{dest_table}"
|
328
328
|
buf.puts "from '#{src_ds.url(src_path)}'"
|
@@ -335,12 +335,6 @@ module Bricolage
|
|
335
335
|
buf.string
|
336
336
|
end
|
337
337
|
|
338
|
-
def provide_default_load_options(opts, src_ds)
|
339
|
-
if src_ds.encrypted? and not opts.key?('encrypted')
|
340
|
-
opts['encrypted'] = true
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
338
|
def format_option(fmt, src_ds, jsonpath)
|
345
339
|
case fmt
|
346
340
|
when 'tsv'
|
@@ -477,6 +471,12 @@ module Bricolage
|
|
477
471
|
buf.string
|
478
472
|
end
|
479
473
|
|
474
|
+
def provide_defaults(src_ds)
|
475
|
+
if src_ds.encrypted? and not key?('encrypted')
|
476
|
+
self['encrypted'] = true
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
480
|
class Option
|
481
481
|
def initialize(name, value)
|
482
482
|
@name = name
|
@@ -8,22 +8,28 @@ module Bricolage
|
|
8
8
|
class S3DataSource < DataSource
|
9
9
|
declare_type 's3'
|
10
10
|
|
11
|
-
def initialize(
|
11
|
+
def initialize(
|
12
|
+
endpoint: 's3-ap-northeast-1.amazonaws.com',
|
13
|
+
region: 'ap-northeast-1',
|
12
14
|
bucket: nil, prefix: nil,
|
13
15
|
access_key_id: nil, secret_access_key: nil, master_symmetric_key: nil,
|
16
|
+
encryption: nil,
|
14
17
|
s3cfg: nil)
|
15
|
-
@endpoint = endpoint
|
16
|
-
@
|
18
|
+
@endpoint = (/\Ahttps?:/ =~ endpoint) ? endpoint : "https://#{endpoint}"
|
19
|
+
@region = region
|
20
|
+
@bucket_name = bucket
|
17
21
|
@prefix = (prefix && prefix.empty?) ? nil : prefix
|
18
22
|
@access_key_id = access_key_id
|
19
23
|
@secret_access_key = secret_access_key
|
20
24
|
@master_symmetric_key = master_symmetric_key
|
25
|
+
@encryption = encryption
|
21
26
|
@s3cfg = s3cfg
|
22
27
|
@configurations = @s3cfg ? load_configurations(@s3cfg) : nil
|
23
28
|
end
|
24
29
|
|
25
30
|
attr_reader :endpoint
|
26
|
-
attr_reader :
|
31
|
+
attr_reader :region
|
32
|
+
attr_reader :bucket_name
|
27
33
|
attr_reader :prefix
|
28
34
|
|
29
35
|
def new_task
|
@@ -64,8 +70,10 @@ module Bricolage
|
|
64
70
|
h
|
65
71
|
end
|
66
72
|
|
73
|
+
attr_reader :encryption
|
74
|
+
|
67
75
|
def encrypted?
|
68
|
-
|
76
|
+
!!(@master_symmetric_key or @encryption)
|
69
77
|
end
|
70
78
|
|
71
79
|
#
|
@@ -73,29 +81,30 @@ module Bricolage
|
|
73
81
|
#
|
74
82
|
|
75
83
|
def client
|
76
|
-
@client ||=
|
77
|
-
end
|
78
|
-
|
79
|
-
def objects
|
80
|
-
client.buckets[bucket].objects
|
84
|
+
@client ||= Aws::S3::Client.new(region: @region, endpoint: @endpoint, access_key_id: access_key, secret_access_key: secret_key)
|
81
85
|
end
|
82
86
|
|
83
|
-
def
|
84
|
-
|
87
|
+
def bucket
|
88
|
+
@resource ||= Aws::S3::Resource.new(client: client)
|
89
|
+
@bucket ||= @resource.bucket(@bucket_name)
|
85
90
|
end
|
86
91
|
|
87
92
|
def object(rel, no_prefix: false)
|
88
|
-
|
93
|
+
bucket.object(path(rel, no_prefix: no_prefix))
|
89
94
|
end
|
90
95
|
|
91
96
|
def url(rel, no_prefix: false)
|
92
|
-
"s3://#{@
|
97
|
+
"s3://#{@bucket_name}/#{path(rel, no_prefix: no_prefix)}"
|
93
98
|
end
|
94
99
|
|
95
100
|
def path(rel, no_prefix: false)
|
96
|
-
path = (no_prefix ||
|
101
|
+
path = (no_prefix || !@prefix) ? rel.to_s : "#{@prefix}/#{rel}"
|
97
102
|
path.sub(%r<\A/>, '').gsub(%r<//>, '/')
|
98
103
|
end
|
104
|
+
|
105
|
+
def traverse(rel, no_prefix: false)
|
106
|
+
bucket.objects(prefix: path(rel, no_prefix: no_prefix))
|
107
|
+
end
|
99
108
|
end
|
100
109
|
|
101
110
|
class S3Task < DataSourceTask
|
@@ -143,7 +152,7 @@ module Bricolage
|
|
143
152
|
raise JobFailure, "no such file: #{@src}" if source_files.empty?
|
144
153
|
each_src_dest do |src, dest|
|
145
154
|
ds.logger.info command_line(src, dest)
|
146
|
-
ds.object(dest).
|
155
|
+
ds.object(dest).upload_file(src)
|
147
156
|
end
|
148
157
|
nil
|
149
158
|
end
|
data/lib/bricolage/version.rb
CHANGED
data/test/home/Gemfile
CHANGED
data/test/home/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: ../..
|
3
3
|
specs:
|
4
|
-
bricolage (5.
|
5
|
-
aws-sdk (
|
4
|
+
bricolage (5.13.0)
|
5
|
+
aws-sdk (~> 2)
|
6
6
|
mysql2
|
7
7
|
pg
|
8
8
|
td
|
@@ -10,35 +10,43 @@ PATH
|
|
10
10
|
GEM
|
11
11
|
remote: https://rubygems.org/
|
12
12
|
specs:
|
13
|
-
aws-sdk (
|
14
|
-
aws-sdk-
|
15
|
-
aws-sdk-
|
16
|
-
|
17
|
-
|
13
|
+
aws-sdk (2.2.6)
|
14
|
+
aws-sdk-resources (= 2.2.6)
|
15
|
+
aws-sdk-core (2.2.6)
|
16
|
+
jmespath (~> 1.0)
|
17
|
+
aws-sdk-resources (2.2.6)
|
18
|
+
aws-sdk-core (= 2.2.6)
|
19
|
+
coderay (1.1.0)
|
18
20
|
fluent-logger (0.4.10)
|
19
21
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
20
22
|
yajl-ruby (~> 1.0)
|
21
23
|
hirb (0.7.3)
|
22
24
|
httpclient (2.5.3.3)
|
25
|
+
jmespath (1.1.3)
|
23
26
|
json (1.8.3)
|
24
|
-
|
27
|
+
method_source (0.8.2)
|
25
28
|
msgpack (0.5.11)
|
26
|
-
mysql2 (0.4.
|
27
|
-
nokogiri (1.6.6.2)
|
28
|
-
mini_portile (~> 0.6.0)
|
29
|
+
mysql2 (0.4.2)
|
29
30
|
parallel (0.6.5)
|
30
|
-
pg (0.18.
|
31
|
+
pg (0.18.4)
|
32
|
+
pry (0.10.3)
|
33
|
+
coderay (~> 1.1.0)
|
34
|
+
method_source (~> 0.8.1)
|
35
|
+
slop (~> 3.4)
|
36
|
+
ruby-progressbar (1.7.5)
|
31
37
|
rubyzip (1.1.7)
|
32
|
-
|
38
|
+
slop (3.6.0)
|
39
|
+
td (0.13.0)
|
33
40
|
hirb (>= 0.4.5)
|
34
41
|
msgpack (>= 0.4.4, < 0.5.12, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
35
42
|
parallel (~> 0.6.1)
|
43
|
+
ruby-progressbar (~> 1.7.5)
|
36
44
|
rubyzip (~> 1.1.7)
|
37
|
-
td-client (~> 0.8.
|
45
|
+
td-client (~> 0.8.76)
|
38
46
|
td-logger (~> 0.3.21)
|
39
47
|
yajl-ruby (~> 1.1)
|
40
48
|
zip-zip (~> 0.3)
|
41
|
-
td-client (0.8.
|
49
|
+
td-client (0.8.76)
|
42
50
|
httpclient (>= 2.5.2, < 2.6.0)
|
43
51
|
json (>= 1.7.6)
|
44
52
|
msgpack (>= 0.4.4, < 0.6.0, != 0.5.3, != 0.5.2, != 0.5.1, != 0.5.0)
|
@@ -55,6 +63,7 @@ PLATFORMS
|
|
55
63
|
|
56
64
|
DEPENDENCIES
|
57
65
|
bricolage!
|
66
|
+
pry
|
58
67
|
|
59
68
|
BUNDLED WITH
|
60
69
|
1.10.6
|
@@ -0,0 +1 @@
|
|
1
|
+
test
|
data/test/home/put.sh
CHANGED
data/test/home/revert.sh
CHANGED
@@ -7,6 +7,6 @@ in \
|
|
7
7
|
20141002-1355_02.txt
|
8
8
|
do
|
9
9
|
aws s3 mv \
|
10
|
-
|
11
|
-
|
10
|
+
$S3_DEV/dev/save/year=2014/month=10/day=02/hour=13/$name \
|
11
|
+
$S3_DEV/dev/queue/$name
|
12
12
|
done
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -28,14 +28,14 @@ dependencies:
|
|
28
28
|
name: aws-sdk
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '2'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2'
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -169,6 +169,7 @@ files:
|
|
169
169
|
- test/home/data/20141002-1355_00.txt
|
170
170
|
- test/home/data/20141002-1355_01.txt
|
171
171
|
- test/home/data/20141002-1355_02.txt
|
172
|
+
- test/home/data/test.txt
|
172
173
|
- test/home/jobnet-test.rb
|
173
174
|
- test/home/put.sh
|
174
175
|
- test/home/revert.sh
|
@@ -187,6 +188,7 @@ files:
|
|
187
188
|
- test/home/subsys/migrate.job
|
188
189
|
- test/home/subsys/net1.jobnet
|
189
190
|
- test/home/subsys/net2.jobnet
|
191
|
+
- test/home/subsys/put.job
|
190
192
|
- test/home/subsys/raw-vacuum.jobnet
|
191
193
|
- test/home/subsys/raw-vacuum.sql.job
|
192
194
|
- test/home/subsys/rebuild.sql.job
|