embulk-output-vertica 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/embulk-output-vertica.gemspec +4 -4
- data/example/timeout.yml +41 -0
- data/lib/embulk/output/vertica.rb +5 -2
- data/lib/embulk/output/vertica/output_thread.rb +89 -14
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e89fddffd4579262fb1d91d92a3288c0e8a8ba2
|
4
|
+
data.tar.gz: e5708526eb702e7008e2758126251a4e306e75f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efa9d97f5d999a983f4d55ffc5cb5413bf1b9da63f825276682e01c14fbdc9541da1277a108b2b639f9960f7e6af21f4b1c7b99894ca51ae1954814e777e5e27
|
7
|
+
data.tar.gz: c4c1b97985f3211d187192b8581f1602109cb2eb9b2d1f58dc05e30e232a7b823e00fd7b76831fea95883b80232e759d632d621892f3c69429612cf3b5e1fe12
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -44,7 +44,7 @@
|
|
44
44
|
### Modes
|
45
45
|
|
46
46
|
* **insert**:
|
47
|
-
* Behavior: This mode copies rows to
|
47
|
+
* Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table>`
|
48
48
|
* **replace**:
|
49
49
|
* Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, swaps the target table and the intermediate table, and drops the old target table
|
50
50
|
* **drop_insert**:
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.7.
|
4
|
-
spec.authors = ["
|
5
|
-
spec.email = ["
|
3
|
+
spec.version = "0.7.3"
|
4
|
+
spec.authors = ["Naotoshi Seo", "Eiji Sekiya"]
|
5
|
+
spec.email = ["sonots@gmail.com", "eiji.sekiya.0326@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
7
7
|
spec.description = "Dump records to vertica"
|
8
|
-
spec.homepage = "https://github.com/
|
8
|
+
spec.homepage = "https://github.com/sonots/embulk-output-vertica"
|
9
9
|
spec.license = "MIT"
|
10
10
|
|
11
11
|
spec.files = `git ls-files -z`.split("\x0")
|
data/example/timeout.yml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# in:
|
2
|
+
# type: random
|
3
|
+
# rows: 100
|
4
|
+
# schema:
|
5
|
+
# id: primary_key
|
6
|
+
# name: string
|
7
|
+
# score: integer
|
8
|
+
in:
|
9
|
+
type: file
|
10
|
+
path_prefix: example/example.csv
|
11
|
+
parser:
|
12
|
+
type: csv
|
13
|
+
charset: UTF-8
|
14
|
+
newline: CRLF
|
15
|
+
null_string: 'NULL'
|
16
|
+
skip_header_lines: 1
|
17
|
+
comment_line_marker: '#'
|
18
|
+
columns:
|
19
|
+
- {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
20
|
+
- {name: string_date, type: string}
|
21
|
+
- {name: foo, type: string}
|
22
|
+
- {name: bar, type: string}
|
23
|
+
- {name: id, type: long}
|
24
|
+
- {name: name, type: string}
|
25
|
+
- {name: score, type: double}
|
26
|
+
out:
|
27
|
+
type: vertica
|
28
|
+
host: 127.0.0.1
|
29
|
+
user: dbadmin
|
30
|
+
password: xxxxxxx
|
31
|
+
database: vdb
|
32
|
+
schema: sandbox
|
33
|
+
table: embulk_test
|
34
|
+
mode: drop_insert
|
35
|
+
copy_mode: DIRECT
|
36
|
+
abort_on_error: true
|
37
|
+
reject_on_materialized_type_error: true
|
38
|
+
default_timezone: 'Asia/Tokyo'
|
39
|
+
write_timeout: 660
|
40
|
+
dequeue_timeout: 780
|
41
|
+
finish_timeout: 180
|
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
@thread_pool ||= @thread_pool_proc.call
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.transaction(config, schema,
|
17
|
+
def self.transaction(config, schema, task_count, &control)
|
18
18
|
task = {
|
19
19
|
'host' => config.param('host', :string, :default => 'localhost'),
|
20
20
|
'port' => config.param('port', :integer, :default => 5433),
|
@@ -33,7 +33,10 @@ module Embulk
|
|
33
33
|
'json_payload' => config.param('json_payload', :bool, :default => false),
|
34
34
|
'resource_pool' => config.param('resource_pool', :string, :default => nil),
|
35
35
|
'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
|
36
|
-
'pool' => config.param('pool', :integer, :default =>
|
36
|
+
'pool' => config.param('pool', :integer, :default => task_count),
|
37
|
+
'write_timeout' => config.param('write_timeout', :integer, :default => nil), # like 11 * 60 sec
|
38
|
+
'dequeue_timeout' => config.param('dequeue_timeout', :integer, :default => nil), # like 13 * 60 sec
|
39
|
+
'finish_timeout' => config.param('finish_timeout', :integer, :default => nil), # like 3 * 60 sec
|
37
40
|
}
|
38
41
|
|
39
42
|
@thread_pool_proc = Proc.new do
|
@@ -3,6 +3,14 @@ require 'zlib'
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
5
5
|
class Vertica < OutputPlugin
|
6
|
+
class CommitError < ::StandardError; end
|
7
|
+
class TimeoutError < ::Timeout::Error; end
|
8
|
+
class DequeueTimeoutError < TimeoutError; end
|
9
|
+
class FinishTimeoutError < TimeoutError; end
|
10
|
+
class WriteTimeoutError < TimeoutError; end
|
11
|
+
|
12
|
+
$embulk_output_vertica_thread_dumped = false
|
13
|
+
|
6
14
|
class OutputThreadPool
|
7
15
|
def initialize(task, schema, size)
|
8
16
|
@task = task
|
@@ -30,7 +38,14 @@ module Embulk
|
|
30
38
|
end
|
31
39
|
|
32
40
|
def commit
|
33
|
-
|
41
|
+
Embulk.logger.debug "embulk-output-vertica: commit"
|
42
|
+
task_reports = @mutex.synchronize do
|
43
|
+
@size.times.map {|i| @output_threads[i].commit }
|
44
|
+
end
|
45
|
+
unless task_reports.all? {|task_report| task_report['success'] }
|
46
|
+
raise CommitError, "some of output_threads failed to commit"
|
47
|
+
end
|
48
|
+
task_reports
|
34
49
|
end
|
35
50
|
|
36
51
|
def to_json(record)
|
@@ -64,12 +79,26 @@ module Embulk
|
|
64
79
|
end
|
65
80
|
end
|
66
81
|
|
82
|
+
def thread_dump
|
83
|
+
unless $embulk_output_vertica_thread_dumped
|
84
|
+
$embulk_output_vertica_thread_dumped = true
|
85
|
+
Embulk.logger.debug "embulk-output-vertica: kill -3 #{$$} (Thread dump)"
|
86
|
+
begin
|
87
|
+
Process.kill :QUIT, $$
|
88
|
+
rescue SignalException
|
89
|
+
ensure
|
90
|
+
sleep 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
67
95
|
def enqueue(json_page)
|
68
96
|
if @thread_active and @thread.alive?
|
69
97
|
Embulk.logger.trace { "embulk-output-vertica: enqueue" }
|
70
98
|
@queue.push(json_page)
|
71
99
|
else
|
72
100
|
Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" }
|
101
|
+
thread_dump
|
73
102
|
raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue"
|
74
103
|
end
|
75
104
|
end
|
@@ -93,7 +122,8 @@ module Embulk
|
|
93
122
|
i = 0
|
94
123
|
# split str not to be blocked (max size of pipe buf is 64k bytes on Linux, Mac at default)
|
95
124
|
while substr = str[i, PIPE_BUF]
|
96
|
-
io.write
|
125
|
+
Embulk.logger.trace { "embulk-output-vertica: io.write with write_timeout:#{@task['write_timeout']}" }
|
126
|
+
Timeout.timeout(@task['write_timeout'], WriteTimeoutError) { io.write(substr) }
|
97
127
|
i += PIPE_BUF
|
98
128
|
end
|
99
129
|
end
|
@@ -121,19 +151,24 @@ module Embulk
|
|
121
151
|
# @return [Array] dequeued json_page
|
122
152
|
# @return [String] 'finish' is dequeued to finish
|
123
153
|
def dequeue
|
124
|
-
json_page =
|
154
|
+
json_page = nil
|
155
|
+
Embulk.logger.trace { "embulk-output-vertica: @queue.pop with dequeue_timeout:#{@task['dequeue_timeout']}" }
|
156
|
+
Timeout.timeout(@task['dequeue_timeout'], DequeueTimeoutError) { json_page = @queue.pop }
|
125
157
|
Embulk.logger.trace { "embulk-output-vertica: dequeued" }
|
126
158
|
Embulk.logger.debug { "embulk-output-vertica: dequeued finish" } if json_page == 'finish'
|
127
159
|
json_page
|
128
160
|
end
|
129
161
|
|
130
162
|
def copy(jv, sql, &block)
|
131
|
-
Embulk.logger.debug "embulk-output-vertica:
|
163
|
+
Embulk.logger.debug "embulk-output-vertica: copy, waiting a first message"
|
164
|
+
|
132
165
|
num_output_rows = 0; rejected_row_nums = []; last_record = nil
|
133
166
|
|
134
167
|
json_page = dequeue
|
135
168
|
return [num_output_rows, rejected_row_nums, last_record] if json_page == 'finish'
|
136
169
|
|
170
|
+
Embulk.logger.debug "embulk-output-vertica: #{sql}"
|
171
|
+
|
137
172
|
num_output_rows, rejected_row_nums = jv.copy(sql) do |stdin, stream|
|
138
173
|
@write_proc.call(stdin, json_page) {|record| last_record = record }
|
139
174
|
|
@@ -159,7 +194,8 @@ module Embulk
|
|
159
194
|
|
160
195
|
def run
|
161
196
|
Embulk.logger.debug { "embulk-output-vertica: thread started" }
|
162
|
-
|
197
|
+
begin
|
198
|
+
jv = Vertica.connect(@task)
|
163
199
|
begin
|
164
200
|
num_output_rows, rejected_row_nums, last_record = copy(jv, copy_sql)
|
165
201
|
Embulk.logger.debug { "embulk-output-vertica: thread finished" }
|
@@ -170,20 +206,50 @@ module Embulk
|
|
170
206
|
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{rejected_row_nums}"
|
171
207
|
end
|
172
208
|
Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
|
173
|
-
jv
|
174
|
-
raise e
|
209
|
+
rollback(jv)
|
210
|
+
raise e
|
175
211
|
rescue => e
|
176
|
-
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message}"
|
177
|
-
jv
|
212
|
+
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
|
213
|
+
rollback(jv)
|
178
214
|
raise e
|
179
215
|
end
|
216
|
+
ensure
|
217
|
+
close(jv)
|
180
218
|
end
|
181
|
-
rescue => e
|
219
|
+
rescue TimeoutError => e
|
220
|
+
Embulk.logger.error "embulk-output-vertica: UNKNOWN TIMEOUT!! #{e.class}"
|
221
|
+
@thread_active = false # not to be enqueued any more
|
222
|
+
dequeue_all
|
223
|
+
thread_dump
|
224
|
+
exit(1)
|
225
|
+
rescue Exception => e
|
226
|
+
Embulk.logger.error "embulk-output-vertica: UNKNOWN ERROR! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
|
182
227
|
@thread_active = false # not to be enqueued any more
|
228
|
+
dequeue_all
|
229
|
+
@outer_thread.raise e
|
230
|
+
end
|
231
|
+
|
232
|
+
def dequeue_all
|
233
|
+
Embulk.logger.debug "embulk-output-vertica: dequeue all"
|
183
234
|
while @queue.size > 0
|
184
235
|
@queue.pop # dequeue all because some might be still trying @queue.push and get blocked, need to release
|
185
236
|
end
|
186
|
-
|
237
|
+
end
|
238
|
+
|
239
|
+
def close(jv)
|
240
|
+
begin
|
241
|
+
jv.close
|
242
|
+
rescue java.sql.SQLException => e # The connection is closed
|
243
|
+
Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def rollback(jv)
|
248
|
+
begin
|
249
|
+
jv.rollback
|
250
|
+
rescue java.sql.SQLException => e # The connection is closed
|
251
|
+
Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
|
252
|
+
end
|
187
253
|
end
|
188
254
|
|
189
255
|
def start
|
@@ -192,20 +258,29 @@ module Embulk
|
|
192
258
|
end
|
193
259
|
|
194
260
|
def commit
|
261
|
+
Embulk.logger.debug "embulk-output-vertica: output_thread commit"
|
195
262
|
@thread_active = false
|
263
|
+
success = true
|
196
264
|
if @thread.alive?
|
197
|
-
Embulk.logger.debug { "embulk-output-vertica: push finish" }
|
265
|
+
Embulk.logger.debug { "embulk-output-vertica: push finish with finish_timeout:#{@task['finish_timeout']}" }
|
198
266
|
@queue.push('finish')
|
199
267
|
Thread.pass
|
200
|
-
@thread.join
|
268
|
+
@thread.join(@task['finish_timeout'])
|
269
|
+
if @thread.alive?
|
270
|
+
@thread.kill
|
271
|
+
Embulk.logger.error "embulk-output-vertica: finish_timeout #{@task['finish_timeout']}sec exceeded, thread is killed forcely"
|
272
|
+
success = false
|
273
|
+
end
|
201
274
|
else
|
202
|
-
|
275
|
+
Embulk.logger.error "embulk-output-vertica: thread died accidently"
|
276
|
+
success = false
|
203
277
|
end
|
204
278
|
|
205
279
|
task_report = {
|
206
280
|
'num_input_rows' => @num_input_rows,
|
207
281
|
'num_output_rows' => @num_output_rows,
|
208
282
|
'num_rejected_rows' => @num_rejected_rows,
|
283
|
+
'success' => success
|
209
284
|
}
|
210
285
|
end
|
211
286
|
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- eiji.sekiya
|
8
7
|
- Naotoshi Seo
|
8
|
+
- Eiji Sekiya
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jvertica
|
@@ -69,8 +69,8 @@ dependencies:
|
|
69
69
|
version: '10.0'
|
70
70
|
description: Dump records to vertica
|
71
71
|
email:
|
72
|
-
- eiji.sekiya.0326@gmail.com
|
73
72
|
- sonots@gmail.com
|
73
|
+
- eiji.sekiya.0326@gmail.com
|
74
74
|
executables: []
|
75
75
|
extensions: []
|
76
76
|
extra_rdoc_files: []
|
@@ -89,10 +89,11 @@ files:
|
|
89
89
|
- example/json_payload.yml
|
90
90
|
- example/replace.yml
|
91
91
|
- example/resource_pool.yml
|
92
|
+
- example/timeout.yml
|
92
93
|
- lib/embulk/output/vertica.rb
|
93
94
|
- lib/embulk/output/vertica/output_thread.rb
|
94
95
|
- lib/embulk/output/vertica/value_converter_factory.rb
|
95
|
-
homepage: https://github.com/
|
96
|
+
homepage: https://github.com/sonots/embulk-output-vertica
|
96
97
|
licenses:
|
97
98
|
- MIT
|
98
99
|
metadata: {}
|