embulk-output-vertica 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/embulk-output-vertica.gemspec +4 -4
- data/example/timeout.yml +41 -0
- data/lib/embulk/output/vertica.rb +5 -2
- data/lib/embulk/output/vertica/output_thread.rb +89 -14
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e89fddffd4579262fb1d91d92a3288c0e8a8ba2
|
4
|
+
data.tar.gz: e5708526eb702e7008e2758126251a4e306e75f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efa9d97f5d999a983f4d55ffc5cb5413bf1b9da63f825276682e01c14fbdc9541da1277a108b2b639f9960f7e6af21f4b1c7b99894ca51ae1954814e777e5e27
|
7
|
+
data.tar.gz: c4c1b97985f3211d187192b8581f1602109cb2eb9b2d1f58dc05e30e232a7b823e00fd7b76831fea95883b80232e759d632d621892f3c69429612cf3b5e1fe12
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -44,7 +44,7 @@
|
|
44
44
|
### Modes
|
45
45
|
|
46
46
|
* **insert**:
|
47
|
-
* Behavior: This mode copies rows to
|
47
|
+
* Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table>`
|
48
48
|
* **replace**:
|
49
49
|
* Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, swaps the target table and the intermediate table, and drops the old target table
|
50
50
|
* **drop_insert**:
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.7.
|
4
|
-
spec.authors = ["
|
5
|
-
spec.email = ["
|
3
|
+
spec.version = "0.7.3"
|
4
|
+
spec.authors = ["Naotoshi Seo", "Eiji Sekiya"]
|
5
|
+
spec.email = ["sonots@gmail.com", "eiji.sekiya.0326@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
7
7
|
spec.description = "Dump records to vertica"
|
8
|
-
spec.homepage = "https://github.com/
|
8
|
+
spec.homepage = "https://github.com/sonots/embulk-output-vertica"
|
9
9
|
spec.license = "MIT"
|
10
10
|
|
11
11
|
spec.files = `git ls-files -z`.split("\x0")
|
data/example/timeout.yml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# in:
|
2
|
+
# type: random
|
3
|
+
# rows: 100
|
4
|
+
# schema:
|
5
|
+
# id: primary_key
|
6
|
+
# name: string
|
7
|
+
# score: integer
|
8
|
+
in:
|
9
|
+
type: file
|
10
|
+
path_prefix: example/example.csv
|
11
|
+
parser:
|
12
|
+
type: csv
|
13
|
+
charset: UTF-8
|
14
|
+
newline: CRLF
|
15
|
+
null_string: 'NULL'
|
16
|
+
skip_header_lines: 1
|
17
|
+
comment_line_marker: '#'
|
18
|
+
columns:
|
19
|
+
- {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
20
|
+
- {name: string_date, type: string}
|
21
|
+
- {name: foo, type: string}
|
22
|
+
- {name: bar, type: string}
|
23
|
+
- {name: id, type: long}
|
24
|
+
- {name: name, type: string}
|
25
|
+
- {name: score, type: double}
|
26
|
+
out:
|
27
|
+
type: vertica
|
28
|
+
host: 127.0.0.1
|
29
|
+
user: dbadmin
|
30
|
+
password: xxxxxxx
|
31
|
+
database: vdb
|
32
|
+
schema: sandbox
|
33
|
+
table: embulk_test
|
34
|
+
mode: drop_insert
|
35
|
+
copy_mode: DIRECT
|
36
|
+
abort_on_error: true
|
37
|
+
reject_on_materialized_type_error: true
|
38
|
+
default_timezone: 'Asia/Tokyo'
|
39
|
+
write_timeout: 660
|
40
|
+
dequeue_timeout: 780
|
41
|
+
finish_timeout: 180
|
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
@thread_pool ||= @thread_pool_proc.call
|
15
15
|
end
|
16
16
|
|
17
|
-
def self.transaction(config, schema,
|
17
|
+
def self.transaction(config, schema, task_count, &control)
|
18
18
|
task = {
|
19
19
|
'host' => config.param('host', :string, :default => 'localhost'),
|
20
20
|
'port' => config.param('port', :integer, :default => 5433),
|
@@ -33,7 +33,10 @@ module Embulk
|
|
33
33
|
'json_payload' => config.param('json_payload', :bool, :default => false),
|
34
34
|
'resource_pool' => config.param('resource_pool', :string, :default => nil),
|
35
35
|
'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
|
36
|
-
'pool' => config.param('pool', :integer, :default =>
|
36
|
+
'pool' => config.param('pool', :integer, :default => task_count),
|
37
|
+
'write_timeout' => config.param('write_timeout', :integer, :default => nil), # like 11 * 60 sec
|
38
|
+
'dequeue_timeout' => config.param('dequeue_timeout', :integer, :default => nil), # like 13 * 60 sec
|
39
|
+
'finish_timeout' => config.param('finish_timeout', :integer, :default => nil), # like 3 * 60 sec
|
37
40
|
}
|
38
41
|
|
39
42
|
@thread_pool_proc = Proc.new do
|
@@ -3,6 +3,14 @@ require 'zlib'
|
|
3
3
|
module Embulk
|
4
4
|
module Output
|
5
5
|
class Vertica < OutputPlugin
|
6
|
+
class CommitError < ::StandardError; end
|
7
|
+
class TimeoutError < ::Timeout::Error; end
|
8
|
+
class DequeueTimeoutError < TimeoutError; end
|
9
|
+
class FinishTimeoutError < TimeoutError; end
|
10
|
+
class WriteTimeoutError < TimeoutError; end
|
11
|
+
|
12
|
+
$embulk_output_vertica_thread_dumped = false
|
13
|
+
|
6
14
|
class OutputThreadPool
|
7
15
|
def initialize(task, schema, size)
|
8
16
|
@task = task
|
@@ -30,7 +38,14 @@ module Embulk
|
|
30
38
|
end
|
31
39
|
|
32
40
|
def commit
|
33
|
-
|
41
|
+
Embulk.logger.debug "embulk-output-vertica: commit"
|
42
|
+
task_reports = @mutex.synchronize do
|
43
|
+
@size.times.map {|i| @output_threads[i].commit }
|
44
|
+
end
|
45
|
+
unless task_reports.all? {|task_report| task_report['success'] }
|
46
|
+
raise CommitError, "some of output_threads failed to commit"
|
47
|
+
end
|
48
|
+
task_reports
|
34
49
|
end
|
35
50
|
|
36
51
|
def to_json(record)
|
@@ -64,12 +79,26 @@ module Embulk
|
|
64
79
|
end
|
65
80
|
end
|
66
81
|
|
82
|
+
def thread_dump
|
83
|
+
unless $embulk_output_vertica_thread_dumped
|
84
|
+
$embulk_output_vertica_thread_dumped = true
|
85
|
+
Embulk.logger.debug "embulk-output-vertica: kill -3 #{$$} (Thread dump)"
|
86
|
+
begin
|
87
|
+
Process.kill :QUIT, $$
|
88
|
+
rescue SignalException
|
89
|
+
ensure
|
90
|
+
sleep 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
67
95
|
def enqueue(json_page)
|
68
96
|
if @thread_active and @thread.alive?
|
69
97
|
Embulk.logger.trace { "embulk-output-vertica: enqueue" }
|
70
98
|
@queue.push(json_page)
|
71
99
|
else
|
72
100
|
Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" }
|
101
|
+
thread_dump
|
73
102
|
raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue"
|
74
103
|
end
|
75
104
|
end
|
@@ -93,7 +122,8 @@ module Embulk
|
|
93
122
|
i = 0
|
94
123
|
# split str not to be blocked (max size of pipe buf is 64k bytes on Linux, Mac at default)
|
95
124
|
while substr = str[i, PIPE_BUF]
|
96
|
-
io.write
|
125
|
+
Embulk.logger.trace { "embulk-output-vertica: io.write with write_timeout:#{@task['write_timeout']}" }
|
126
|
+
Timeout.timeout(@task['write_timeout'], WriteTimeoutError) { io.write(substr) }
|
97
127
|
i += PIPE_BUF
|
98
128
|
end
|
99
129
|
end
|
@@ -121,19 +151,24 @@ module Embulk
|
|
121
151
|
# @return [Array] dequeued json_page
|
122
152
|
# @return [String] 'finish' is dequeued to finish
|
123
153
|
def dequeue
|
124
|
-
json_page =
|
154
|
+
json_page = nil
|
155
|
+
Embulk.logger.trace { "embulk-output-vertica: @queue.pop with dequeue_timeout:#{@task['dequeue_timeout']}" }
|
156
|
+
Timeout.timeout(@task['dequeue_timeout'], DequeueTimeoutError) { json_page = @queue.pop }
|
125
157
|
Embulk.logger.trace { "embulk-output-vertica: dequeued" }
|
126
158
|
Embulk.logger.debug { "embulk-output-vertica: dequeued finish" } if json_page == 'finish'
|
127
159
|
json_page
|
128
160
|
end
|
129
161
|
|
130
162
|
def copy(jv, sql, &block)
|
131
|
-
Embulk.logger.debug "embulk-output-vertica:
|
163
|
+
Embulk.logger.debug "embulk-output-vertica: copy, waiting a first message"
|
164
|
+
|
132
165
|
num_output_rows = 0; rejected_row_nums = []; last_record = nil
|
133
166
|
|
134
167
|
json_page = dequeue
|
135
168
|
return [num_output_rows, rejected_row_nums, last_record] if json_page == 'finish'
|
136
169
|
|
170
|
+
Embulk.logger.debug "embulk-output-vertica: #{sql}"
|
171
|
+
|
137
172
|
num_output_rows, rejected_row_nums = jv.copy(sql) do |stdin, stream|
|
138
173
|
@write_proc.call(stdin, json_page) {|record| last_record = record }
|
139
174
|
|
@@ -159,7 +194,8 @@ module Embulk
|
|
159
194
|
|
160
195
|
def run
|
161
196
|
Embulk.logger.debug { "embulk-output-vertica: thread started" }
|
162
|
-
|
197
|
+
begin
|
198
|
+
jv = Vertica.connect(@task)
|
163
199
|
begin
|
164
200
|
num_output_rows, rejected_row_nums, last_record = copy(jv, copy_sql)
|
165
201
|
Embulk.logger.debug { "embulk-output-vertica: thread finished" }
|
@@ -170,20 +206,50 @@ module Embulk
|
|
170
206
|
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{rejected_row_nums}"
|
171
207
|
end
|
172
208
|
Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
|
173
|
-
jv
|
174
|
-
raise e
|
209
|
+
rollback(jv)
|
210
|
+
raise e
|
175
211
|
rescue => e
|
176
|
-
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message}"
|
177
|
-
jv
|
212
|
+
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
|
213
|
+
rollback(jv)
|
178
214
|
raise e
|
179
215
|
end
|
216
|
+
ensure
|
217
|
+
close(jv)
|
180
218
|
end
|
181
|
-
rescue => e
|
219
|
+
rescue TimeoutError => e
|
220
|
+
Embulk.logger.error "embulk-output-vertica: UNKNOWN TIMEOUT!! #{e.class}"
|
221
|
+
@thread_active = false # not to be enqueued any more
|
222
|
+
dequeue_all
|
223
|
+
thread_dump
|
224
|
+
exit(1)
|
225
|
+
rescue Exception => e
|
226
|
+
Embulk.logger.error "embulk-output-vertica: UNKNOWN ERROR! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
|
182
227
|
@thread_active = false # not to be enqueued any more
|
228
|
+
dequeue_all
|
229
|
+
@outer_thread.raise e
|
230
|
+
end
|
231
|
+
|
232
|
+
def dequeue_all
|
233
|
+
Embulk.logger.debug "embulk-output-vertica: dequeue all"
|
183
234
|
while @queue.size > 0
|
184
235
|
@queue.pop # dequeue all because some might be still trying @queue.push and get blocked, need to release
|
185
236
|
end
|
186
|
-
|
237
|
+
end
|
238
|
+
|
239
|
+
def close(jv)
|
240
|
+
begin
|
241
|
+
jv.close
|
242
|
+
rescue java.sql.SQLException => e # The connection is closed
|
243
|
+
Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def rollback(jv)
|
248
|
+
begin
|
249
|
+
jv.rollback
|
250
|
+
rescue java.sql.SQLException => e # The connection is closed
|
251
|
+
Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
|
252
|
+
end
|
187
253
|
end
|
188
254
|
|
189
255
|
def start
|
@@ -192,20 +258,29 @@ module Embulk
|
|
192
258
|
end
|
193
259
|
|
194
260
|
def commit
|
261
|
+
Embulk.logger.debug "embulk-output-vertica: output_thread commit"
|
195
262
|
@thread_active = false
|
263
|
+
success = true
|
196
264
|
if @thread.alive?
|
197
|
-
Embulk.logger.debug { "embulk-output-vertica: push finish" }
|
265
|
+
Embulk.logger.debug { "embulk-output-vertica: push finish with finish_timeout:#{@task['finish_timeout']}" }
|
198
266
|
@queue.push('finish')
|
199
267
|
Thread.pass
|
200
|
-
@thread.join
|
268
|
+
@thread.join(@task['finish_timeout'])
|
269
|
+
if @thread.alive?
|
270
|
+
@thread.kill
|
271
|
+
Embulk.logger.error "embulk-output-vertica: finish_timeout #{@task['finish_timeout']}sec exceeded, thread is killed forcely"
|
272
|
+
success = false
|
273
|
+
end
|
201
274
|
else
|
202
|
-
|
275
|
+
Embulk.logger.error "embulk-output-vertica: thread died accidently"
|
276
|
+
success = false
|
203
277
|
end
|
204
278
|
|
205
279
|
task_report = {
|
206
280
|
'num_input_rows' => @num_input_rows,
|
207
281
|
'num_output_rows' => @num_output_rows,
|
208
282
|
'num_rejected_rows' => @num_rejected_rows,
|
283
|
+
'success' => success
|
209
284
|
}
|
210
285
|
end
|
211
286
|
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- eiji.sekiya
|
8
7
|
- Naotoshi Seo
|
8
|
+
- Eiji Sekiya
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jvertica
|
@@ -69,8 +69,8 @@ dependencies:
|
|
69
69
|
version: '10.0'
|
70
70
|
description: Dump records to vertica
|
71
71
|
email:
|
72
|
-
- eiji.sekiya.0326@gmail.com
|
73
72
|
- sonots@gmail.com
|
73
|
+
- eiji.sekiya.0326@gmail.com
|
74
74
|
executables: []
|
75
75
|
extensions: []
|
76
76
|
extra_rdoc_files: []
|
@@ -89,10 +89,11 @@ files:
|
|
89
89
|
- example/json_payload.yml
|
90
90
|
- example/replace.yml
|
91
91
|
- example/resource_pool.yml
|
92
|
+
- example/timeout.yml
|
92
93
|
- lib/embulk/output/vertica.rb
|
93
94
|
- lib/embulk/output/vertica/output_thread.rb
|
94
95
|
- lib/embulk/output/vertica/value_converter_factory.rb
|
95
|
-
homepage: https://github.com/
|
96
|
+
homepage: https://github.com/sonots/embulk-output-vertica
|
96
97
|
licenses:
|
97
98
|
- MIT
|
98
99
|
metadata: {}
|