bricolage 5.9.7 → 5.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/jobclass/streaming_load.rb +12 -1
- data/lib/bricolage/datasource.rb +1 -1
- data/lib/bricolage/parameters.rb +5 -0
- data/lib/bricolage/rubyjobclass.rb +40 -5
- data/lib/bricolage/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 950cab4515b421cdfd9a29fba1fa7812d365647a
|
4
|
+
data.tar.gz: 93badbfd8528d66c97f2cdb5bda2217172386bd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bc1b110a47f6cf5f81a30595fe9b200b77a6b7918bf4b2f7a8064597b3af36e50cc73bf91d32e4def188c43dd2829a81a57184a87e3c82bcad054cdbf3da7a4
|
7
|
+
data.tar.gz: e9cba607a0436f613093ae654b347d69c04024a5ac97860fc113d61e6a34e3c7f9f55afa856c584442a07ed1a02781dc2634d1d9707df725ea9d0aa30c89d666
|
data/jobclass/streaming_load.rb
CHANGED
@@ -20,6 +20,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
20
20
|
params.add Bricolage::StringParam.new('queue-path', 'S3_PATH', 'S3 path for data file queue.')
|
21
21
|
params.add Bricolage::StringParam.new('persistent-path', 'S3_PATH', 'S3 path for persistent data file store.')
|
22
22
|
params.add Bricolage::StringParam.new('file-name', 'PATTERN', 'name pattern of target data file.')
|
23
|
+
params.add Bricolage::SQLFileParam.new('sql-file', 'PATH', 'SQL to insert rows from the work table to the target table.', optional: true)
|
23
24
|
params.add Bricolage::OptionalBoolParam.new('noop', 'Does not change any data.')
|
24
25
|
params.add Bricolage::OptionalBoolParam.new('load-only', 'Just issues COPY statement to work table and quit. No INSERT, no dequeue, no load log.')
|
25
26
|
params.add Bricolage::OptionalBoolParam.new('dequeue-only', 'Dequeues already loaded files.')
|
@@ -46,6 +47,10 @@ class StreamingLoadJobClass < RubyJobClass
|
|
46
47
|
nil
|
47
48
|
end
|
48
49
|
|
50
|
+
def bind(ctx, vars)
|
51
|
+
@loader.sql.bind(ctx, vars) if @loader.sql
|
52
|
+
end
|
53
|
+
|
49
54
|
def make_loader(params)
|
50
55
|
ds = params['redshift-ds']
|
51
56
|
RedshiftStreamingLoader.new(
|
@@ -55,6 +60,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
55
60
|
work_table: string(params['work-table']),
|
56
61
|
log_table: string(params['log-table']),
|
57
62
|
load_options: params['load-options'],
|
63
|
+
sql: params['sql-file'],
|
58
64
|
logger: ds.logger,
|
59
65
|
noop: params['noop'],
|
60
66
|
load_only: params['load-only']
|
@@ -79,6 +85,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
79
85
|
class RedshiftStreamingLoader
|
80
86
|
def initialize(data_source:, queue:,
|
81
87
|
table:, work_table: nil, log_table: nil, load_options: nil,
|
88
|
+
sql: nil,
|
82
89
|
logger:, noop: false, load_only: false)
|
83
90
|
@ds = data_source
|
84
91
|
@src = queue
|
@@ -86,6 +93,7 @@ class StreamingLoadJobClass < RubyJobClass
|
|
86
93
|
@work_table = work_table
|
87
94
|
@log_table = log_table
|
88
95
|
@load_options = load_options
|
96
|
+
@sql = sql
|
89
97
|
@logger = logger
|
90
98
|
@noop = noop
|
91
99
|
@load_only = load_only
|
@@ -95,6 +103,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
95
103
|
@job_process_id = "#{@start_time.strftime('%Y%m%d-%H%M%S')}.#{Socket.gethostname}.#{Process.pid}"
|
96
104
|
end
|
97
105
|
|
106
|
+
attr_reader :sql
|
107
|
+
|
98
108
|
def load
|
99
109
|
load_in_parallel
|
100
110
|
end
|
@@ -170,7 +180,8 @@ class StreamingLoadJobClass < RubyJobClass
|
|
170
180
|
|
171
181
|
def commit_work_table(conn)
|
172
182
|
return unless @work_table
|
173
|
-
|
183
|
+
insert_stmt = @sql ? @sql.source : "insert into #{@table} select * from #{@work_table};"
|
184
|
+
execute_update conn, insert_stmt
|
174
185
|
# keep work table records for tracing
|
175
186
|
end
|
176
187
|
|
data/lib/bricolage/datasource.rb
CHANGED
data/lib/bricolage/parameters.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'bricolage/jobclass'
|
2
|
+
require 'bricolage/datasource'
|
3
|
+
require 'forwardable'
|
2
4
|
|
3
5
|
module Bricolage
|
6
|
+
|
4
7
|
class RubyJobClass
|
5
8
|
def RubyJobClass.job_class_id(id)
|
6
9
|
wrapper = self
|
@@ -24,19 +27,51 @@ module Bricolage
|
|
24
27
|
|
25
28
|
def RubyJobClass.script(params, script)
|
26
29
|
ruby_job = new(params)
|
27
|
-
script.task(params.
|
28
|
-
task.
|
29
|
-
ruby_job.run
|
30
|
-
nil # job result
|
31
|
-
}
|
30
|
+
script.task(params.ruby_ds) {|task|
|
31
|
+
task.bind_ruby_job ruby_job
|
32
32
|
}
|
33
33
|
end
|
34
34
|
|
35
35
|
def initialize(params, *args)
|
36
36
|
end
|
37
37
|
|
38
|
+
def bind(ctx, vars)
|
39
|
+
end
|
40
|
+
|
41
|
+
def source
|
42
|
+
'ruby job'
|
43
|
+
end
|
44
|
+
|
38
45
|
def run
|
39
46
|
raise "bricolage: error: #{self.class}\#run is not overridden"
|
40
47
|
end
|
41
48
|
end
|
49
|
+
|
50
|
+
class RubyDataSource < DataSource
|
51
|
+
declare_type 'ruby'
|
52
|
+
|
53
|
+
# FIXME: keyword argument placeholder is required
|
54
|
+
def initialize(**)
|
55
|
+
end
|
56
|
+
|
57
|
+
def new_task
|
58
|
+
RubyTask.new(self)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class RubyTask < DataSourceTask
|
63
|
+
def bind_ruby_job(ruby_job)
|
64
|
+
add RubyAction.new(ruby_job)
|
65
|
+
end
|
66
|
+
|
67
|
+
class RubyAction < Action
|
68
|
+
def initialize(ruby_job)
|
69
|
+
@ruby_job = ruby_job
|
70
|
+
end
|
71
|
+
|
72
|
+
extend Forwardable
|
73
|
+
def_delegators '@ruby_job', :source, :bind, :run
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
42
77
|
end
|
data/lib/bricolage/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bricolage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Minero Aoki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|