embulk-output-vertica 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e3a0449ef97c75dd1e28f722b344935ba2552952
4
- data.tar.gz: 71983eb9e264edc18535f9e9b75ebd3ef4e3921b
3
+ metadata.gz: 7e89fddffd4579262fb1d91d92a3288c0e8a8ba2
4
+ data.tar.gz: e5708526eb702e7008e2758126251a4e306e75f1
5
5
  SHA512:
6
- metadata.gz: ab6d594e5037f6c12f14a65b7aa0a0e78c1ae1ca18fa79ff5e506cb7aaa86c5e532f2c5eceb76345e6b2d8c99924d1f26a498decd2f2b221875f3d8ba2d97f39
7
- data.tar.gz: ee3105565d94a409ea6c22aa17fbaf3be326c76958cfe1d01e0d657b6d1dcd34e4f934d4ecc94e081eb8048a016006a6e36730c610fcd86fccd368db4deca936
6
+ metadata.gz: efa9d97f5d999a983f4d55ffc5cb5413bf1b9da63f825276682e01c14fbdc9541da1277a108b2b639f9960f7e6af21f4b1c7b99894ca51ae1954814e777e5e27
7
+ data.tar.gz: c4c1b97985f3211d187192b8581f1602109cb2eb9b2d1f58dc05e30e232a7b823e00fd7b76831fea95883b80232e759d632d621892f3c69429612cf3b5e1fe12
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ # 0.7.3 (2016/03/27)
2
+
3
+ Enhancements:
4
+
5
+ * Add write_timeout option
6
+ * Add finish_timeout option
7
+ * Add dequeuen_timeout option
8
+
9
+ Changes:
10
+
11
+ * Transfer repository from eratostennis to sonots
12
+
1
13
  # 0.7.2 (2016/02/10)
2
14
 
3
15
  Fixes:
data/README.md CHANGED
@@ -44,7 +44,7 @@
44
44
  ### Modes
45
45
 
46
46
  * **insert**:
47
- * Behavior: This mode copies rows to some intermediate tables first. If all those tasks run correctly, runs INSERT INTO <target_table> SELECT * FROM <intermediate_table>
47
+ * Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, runs `INSERT INTO <target_table> SELECT * FROM <intermediate_table>`
48
48
  * **replace**:
49
49
  * Behavior: This mode copies rows to an intermediate table first. If all those tasks run correctly, swaps the target table and the intermediate table, and drops the old target table
50
50
  * **drop_insert**:
@@ -1,11 +1,11 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.7.2"
4
- spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
- spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
3
+ spec.version = "0.7.3"
4
+ spec.authors = ["Naotoshi Seo", "Eiji Sekiya"]
5
+ spec.email = ["sonots@gmail.com", "eiji.sekiya.0326@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
7
7
  spec.description = "Dump records to vertica"
8
- spec.homepage = "https://github.com/eratostennis/embulk-output-vertica"
8
+ spec.homepage = "https://github.com/sonots/embulk-output-vertica"
9
9
  spec.license = "MIT"
10
10
 
11
11
  spec.files = `git ls-files -z`.split("\x0")
@@ -0,0 +1,41 @@
1
+ # in:
2
+ # type: random
3
+ # rows: 100
4
+ # schema:
5
+ # id: primary_key
6
+ # name: string
7
+ # score: integer
8
+ in:
9
+ type: file
10
+ path_prefix: example/example.csv
11
+ parser:
12
+ type: csv
13
+ charset: UTF-8
14
+ newline: CRLF
15
+ null_string: 'NULL'
16
+ skip_header_lines: 1
17
+ comment_line_marker: '#'
18
+ columns:
19
+ - {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
20
+ - {name: string_date, type: string}
21
+ - {name: foo, type: string}
22
+ - {name: bar, type: string}
23
+ - {name: id, type: long}
24
+ - {name: name, type: string}
25
+ - {name: score, type: double}
26
+ out:
27
+ type: vertica
28
+ host: 127.0.0.1
29
+ user: dbadmin
30
+ password: xxxxxxx
31
+ database: vdb
32
+ schema: sandbox
33
+ table: embulk_test
34
+ mode: drop_insert
35
+ copy_mode: DIRECT
36
+ abort_on_error: true
37
+ reject_on_materialized_type_error: true
38
+ default_timezone: 'Asia/Tokyo'
39
+ write_timeout: 660
40
+ dequeue_timeout: 780
41
+ finish_timeout: 180
@@ -14,7 +14,7 @@ module Embulk
14
14
  @thread_pool ||= @thread_pool_proc.call
15
15
  end
16
16
 
17
- def self.transaction(config, schema, processor_count, &control)
17
+ def self.transaction(config, schema, task_count, &control)
18
18
  task = {
19
19
  'host' => config.param('host', :string, :default => 'localhost'),
20
20
  'port' => config.param('port', :integer, :default => 5433),
@@ -33,7 +33,10 @@ module Embulk
33
33
  'json_payload' => config.param('json_payload', :bool, :default => false),
34
34
  'resource_pool' => config.param('resource_pool', :string, :default => nil),
35
35
  'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
36
- 'pool' => config.param('pool', :integer, :default => processor_count),
36
+ 'pool' => config.param('pool', :integer, :default => task_count),
37
+ 'write_timeout' => config.param('write_timeout', :integer, :default => nil), # like 11 * 60 sec
38
+ 'dequeue_timeout' => config.param('dequeue_timeout', :integer, :default => nil), # like 13 * 60 sec
39
+ 'finish_timeout' => config.param('finish_timeout', :integer, :default => nil), # like 3 * 60 sec
37
40
  }
38
41
 
39
42
  @thread_pool_proc = Proc.new do
@@ -3,6 +3,14 @@ require 'zlib'
3
3
  module Embulk
4
4
  module Output
5
5
  class Vertica < OutputPlugin
6
+ class CommitError < ::StandardError; end
7
+ class TimeoutError < ::Timeout::Error; end
8
+ class DequeueTimeoutError < TimeoutError; end
9
+ class FinishTimeoutError < TimeoutError; end
10
+ class WriteTimeoutError < TimeoutError; end
11
+
12
+ $embulk_output_vertica_thread_dumped = false
13
+
6
14
  class OutputThreadPool
7
15
  def initialize(task, schema, size)
8
16
  @task = task
@@ -30,7 +38,14 @@ module Embulk
30
38
  end
31
39
 
32
40
  def commit
33
- task_reports = @size.times.map {|i| @output_threads[i].commit }
41
+ Embulk.logger.debug "embulk-output-vertica: commit"
42
+ task_reports = @mutex.synchronize do
43
+ @size.times.map {|i| @output_threads[i].commit }
44
+ end
45
+ unless task_reports.all? {|task_report| task_report['success'] }
46
+ raise CommitError, "some of output_threads failed to commit"
47
+ end
48
+ task_reports
34
49
  end
35
50
 
36
51
  def to_json(record)
@@ -64,12 +79,26 @@ module Embulk
64
79
  end
65
80
  end
66
81
 
82
+ def thread_dump
83
+ unless $embulk_output_vertica_thread_dumped
84
+ $embulk_output_vertica_thread_dumped = true
85
+ Embulk.logger.debug "embulk-output-vertica: kill -3 #{$$} (Thread dump)"
86
+ begin
87
+ Process.kill :QUIT, $$
88
+ rescue SignalException
89
+ ensure
90
+ sleep 1
91
+ end
92
+ end
93
+ end
94
+
67
95
  def enqueue(json_page)
68
96
  if @thread_active and @thread.alive?
69
97
  Embulk.logger.trace { "embulk-output-vertica: enqueue" }
70
98
  @queue.push(json_page)
71
99
  else
72
100
  Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" }
101
+ thread_dump
73
102
  raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue"
74
103
  end
75
104
  end
@@ -93,7 +122,8 @@ module Embulk
93
122
  i = 0
94
123
  # split str not to be blocked (max size of pipe buf is 64k bytes on Linux, Mac at default)
95
124
  while substr = str[i, PIPE_BUF]
96
- io.write(substr)
125
+ Embulk.logger.trace { "embulk-output-vertica: io.write with write_timeout:#{@task['write_timeout']}" }
126
+ Timeout.timeout(@task['write_timeout'], WriteTimeoutError) { io.write(substr) }
97
127
  i += PIPE_BUF
98
128
  end
99
129
  end
@@ -121,19 +151,24 @@ module Embulk
121
151
  # @return [Array] dequeued json_page
122
152
  # @return [String] 'finish' is dequeued to finish
123
153
  def dequeue
124
- json_page = @queue.pop
154
+ json_page = nil
155
+ Embulk.logger.trace { "embulk-output-vertica: @queue.pop with dequeue_timeout:#{@task['dequeue_timeout']}" }
156
+ Timeout.timeout(@task['dequeue_timeout'], DequeueTimeoutError) { json_page = @queue.pop }
125
157
  Embulk.logger.trace { "embulk-output-vertica: dequeued" }
126
158
  Embulk.logger.debug { "embulk-output-vertica: dequeued finish" } if json_page == 'finish'
127
159
  json_page
128
160
  end
129
161
 
130
162
  def copy(jv, sql, &block)
131
- Embulk.logger.debug "embulk-output-vertica: #{sql}"
163
+ Embulk.logger.debug "embulk-output-vertica: copy, waiting a first message"
164
+
132
165
  num_output_rows = 0; rejected_row_nums = []; last_record = nil
133
166
 
134
167
  json_page = dequeue
135
168
  return [num_output_rows, rejected_row_nums, last_record] if json_page == 'finish'
136
169
 
170
+ Embulk.logger.debug "embulk-output-vertica: #{sql}"
171
+
137
172
  num_output_rows, rejected_row_nums = jv.copy(sql) do |stdin, stream|
138
173
  @write_proc.call(stdin, json_page) {|record| last_record = record }
139
174
 
@@ -159,7 +194,8 @@ module Embulk
159
194
 
160
195
  def run
161
196
  Embulk.logger.debug { "embulk-output-vertica: thread started" }
162
- Vertica.connect(@task) do |jv|
197
+ begin
198
+ jv = Vertica.connect(@task)
163
199
  begin
164
200
  num_output_rows, rejected_row_nums, last_record = copy(jv, copy_sql)
165
201
  Embulk.logger.debug { "embulk-output-vertica: thread finished" }
@@ -170,20 +206,50 @@ module Embulk
170
206
  Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{rejected_row_nums}"
171
207
  end
172
208
  Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
173
- jv.rollback
174
- raise e # die transaction
209
+ rollback(jv)
210
+ raise e
175
211
  rescue => e
176
- Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message}"
177
- jv.rollback
212
+ Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
213
+ rollback(jv)
178
214
  raise e
179
215
  end
216
+ ensure
217
+ close(jv)
180
218
  end
181
- rescue => e
219
+ rescue TimeoutError => e
220
+ Embulk.logger.error "embulk-output-vertica: UNKNOWN TIMEOUT!! #{e.class}"
221
+ @thread_active = false # not to be enqueued any more
222
+ dequeue_all
223
+ thread_dump
224
+ exit(1)
225
+ rescue Exception => e
226
+ Embulk.logger.error "embulk-output-vertica: UNKNOWN ERROR! #{e.class} #{e.message} #{e.backtrace.join("\n ")}"
182
227
  @thread_active = false # not to be enqueued any more
228
+ dequeue_all
229
+ @outer_thread.raise e
230
+ end
231
+
232
+ def dequeue_all
233
+ Embulk.logger.debug "embulk-output-vertica: dequeue all"
183
234
  while @queue.size > 0
184
235
  @queue.pop # dequeue all because some might be still trying @queue.push and get blocked, need to release
185
236
  end
186
- @outer_thread.raise e.class.new("#{e.message}\n #{e.backtrace.join("\n ")}")
237
+ end
238
+
239
+ def close(jv)
240
+ begin
241
+ jv.close
242
+ rescue java.sql.SQLException => e # The connection is closed
243
+ Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
244
+ end
245
+ end
246
+
247
+ def rollback(jv)
248
+ begin
249
+ jv.rollback
250
+ rescue java.sql.SQLException => e # The connection is closed
251
+ Embulk.logger.debug "embulk-output-vertica: #{e.class} #{e.message}"
252
+ end
187
253
  end
188
254
 
189
255
  def start
@@ -192,20 +258,29 @@ module Embulk
192
258
  end
193
259
 
194
260
  def commit
261
+ Embulk.logger.debug "embulk-output-vertica: output_thread commit"
195
262
  @thread_active = false
263
+ success = true
196
264
  if @thread.alive?
197
- Embulk.logger.debug { "embulk-output-vertica: push finish" }
265
+ Embulk.logger.debug { "embulk-output-vertica: push finish with finish_timeout:#{@task['finish_timeout']}" }
198
266
  @queue.push('finish')
199
267
  Thread.pass
200
- @thread.join
268
+ @thread.join(@task['finish_timeout'])
269
+ if @thread.alive?
270
+ @thread.kill
271
+ Embulk.logger.error "embulk-output-vertica: finish_timeout #{@task['finish_timeout']}sec exceeded, thread is killed forcely"
272
+ success = false
273
+ end
201
274
  else
202
- raise RuntimeError, "embulk-output-vertica: thread died accidently"
275
+ Embulk.logger.error "embulk-output-vertica: thread died accidently"
276
+ success = false
203
277
  end
204
278
 
205
279
  task_report = {
206
280
  'num_input_rows' => @num_input_rows,
207
281
  'num_output_rows' => @num_output_rows,
208
282
  'num_rejected_rows' => @num_rejected_rows,
283
+ 'success' => success
209
284
  }
210
285
  end
211
286
 
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
- - eiji.sekiya
8
7
  - Naotoshi Seo
8
+ - Eiji Sekiya
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-02-10 00:00:00.000000000 Z
12
+ date: 2016-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jvertica
@@ -69,8 +69,8 @@ dependencies:
69
69
  version: '10.0'
70
70
  description: Dump records to vertica
71
71
  email:
72
- - eiji.sekiya.0326@gmail.com
73
72
  - sonots@gmail.com
73
+ - eiji.sekiya.0326@gmail.com
74
74
  executables: []
75
75
  extensions: []
76
76
  extra_rdoc_files: []
@@ -89,10 +89,11 @@ files:
89
89
  - example/json_payload.yml
90
90
  - example/replace.yml
91
91
  - example/resource_pool.yml
92
+ - example/timeout.yml
92
93
  - lib/embulk/output/vertica.rb
93
94
  - lib/embulk/output/vertica/output_thread.rb
94
95
  - lib/embulk/output/vertica/value_converter_factory.rb
95
- homepage: https://github.com/eratostennis/embulk-output-vertica
96
+ homepage: https://github.com/sonots/embulk-output-vertica
96
97
  licenses:
97
98
  - MIT
98
99
  metadata: {}