embulk-output-vertica 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/embulk-output-vertica.gemspec +1 -1
- data/lib/embulk/output/vertica/output_thread.rb +57 -30
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3a0449ef97c75dd1e28f722b344935ba2552952
|
4
|
+
data.tar.gz: 71983eb9e264edc18535f9e9b75ebd3ef4e3921b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab6d594e5037f6c12f14a65b7aa0a0e78c1ae1ca18fa79ff5e506cb7aaa86c5e532f2c5eceb76345e6b2d8c99924d1f26a498decd2f2b221875f3d8ba2d97f39
|
7
|
+
data.tar.gz: ee3105565d94a409ea6c22aa17fbaf3be326c76958cfe1d01e0d657b6d1dcd34e4f934d4ecc94e081eb8048a016006a6e36730c610fcd86fccd368db4deca936
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.7.
|
3
|
+
spec.version = "0.7.2"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
@@ -77,13 +77,25 @@ module Embulk
|
|
77
77
|
def write_gzip(io, page, &block)
|
78
78
|
buf = Zlib::Deflate.new
|
79
79
|
write_buf(buf, page, &block)
|
80
|
-
io
|
80
|
+
write_io(io, buf.finish)
|
81
81
|
end
|
82
82
|
|
83
83
|
def write_uncompressed(io, page, &block)
|
84
84
|
buf = ''
|
85
85
|
write_buf(buf, page, &block)
|
86
|
-
io
|
86
|
+
write_io(io, buf)
|
87
|
+
end
|
88
|
+
|
89
|
+
PIPE_BUF = 4096
|
90
|
+
|
91
|
+
def write_io(io, str)
|
92
|
+
str = str.force_encoding('ASCII-8BIT')
|
93
|
+
i = 0
|
94
|
+
# split str not to be blocked (max size of pipe buf is 64k bytes on Linux, Mac at default)
|
95
|
+
while substr = str[i, PIPE_BUF]
|
96
|
+
io.write(substr)
|
97
|
+
i += PIPE_BUF
|
98
|
+
end
|
87
99
|
end
|
88
100
|
|
89
101
|
def write_buf(buf, json_page, &block)
|
@@ -106,42 +118,62 @@ module Embulk
|
|
106
118
|
number.to_s.gsub(/(\d)(?=(\d{3})+(?!\d))/, '\1,')
|
107
119
|
end
|
108
120
|
|
121
|
+
# @return [Array] dequeued json_page
|
122
|
+
# @return [String] 'finish' is dequeued to finish
|
123
|
+
def dequeue
|
124
|
+
json_page = @queue.pop
|
125
|
+
Embulk.logger.trace { "embulk-output-vertica: dequeued" }
|
126
|
+
Embulk.logger.debug { "embulk-output-vertica: dequeued finish" } if json_page == 'finish'
|
127
|
+
json_page
|
128
|
+
end
|
129
|
+
|
130
|
+
def copy(jv, sql, &block)
|
131
|
+
Embulk.logger.debug "embulk-output-vertica: #{sql}"
|
132
|
+
num_output_rows = 0; rejected_row_nums = []; last_record = nil
|
133
|
+
|
134
|
+
json_page = dequeue
|
135
|
+
return [num_output_rows, rejected_row_nums, last_record] if json_page == 'finish'
|
136
|
+
|
137
|
+
num_output_rows, rejected_row_nums = jv.copy(sql) do |stdin, stream|
|
138
|
+
@write_proc.call(stdin, json_page) {|record| last_record = record }
|
139
|
+
|
140
|
+
while true
|
141
|
+
json_page = dequeue
|
142
|
+
break if json_page == 'finish'
|
143
|
+
@write_proc.call(stdin, json_page) {|record| last_record = record }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
@num_output_rows += num_output_rows
|
148
|
+
@num_rejected_rows += rejected_row_nums.size
|
149
|
+
Embulk.logger.info { "embulk-output-vertica: COMMIT!" }
|
150
|
+
jv.commit
|
151
|
+
Embulk.logger.debug { "embulk-output-vertica: COMMITTED!" }
|
152
|
+
|
153
|
+
if rejected_row_nums.size > 0
|
154
|
+
Embulk.logger.debug { "embulk-output-vertica: rejected_row_nums: #{rejected_row_nums}" }
|
155
|
+
end
|
156
|
+
|
157
|
+
[num_output_rows, rejected_row_nums, last_record]
|
158
|
+
end
|
159
|
+
|
109
160
|
def run
|
110
161
|
Embulk.logger.debug { "embulk-output-vertica: thread started" }
|
111
162
|
Vertica.connect(@task) do |jv|
|
112
163
|
begin
|
113
|
-
last_record =
|
114
|
-
num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
|
115
|
-
while json_page = @queue.pop
|
116
|
-
if json_page == 'finish'
|
117
|
-
Embulk.logger.debug { "embulk-output-vertica: popped finish" }
|
118
|
-
break
|
119
|
-
end
|
120
|
-
Embulk.logger.trace { "embulk-output-vertica: dequeued" }
|
121
|
-
|
122
|
-
@write_proc.call(stdin, json_page) do |record|
|
123
|
-
last_record = record
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
164
|
+
num_output_rows, rejected_row_nums, last_record = copy(jv, copy_sql)
|
127
165
|
Embulk.logger.debug { "embulk-output-vertica: thread finished" }
|
128
|
-
num_rejected_rows = rejects.size
|
129
|
-
@num_output_rows += num_output_rows
|
130
|
-
@num_rejected_rows += num_rejected_rows
|
131
|
-
Embulk.logger.info { "embulk-output-vertica: COMMIT!" }
|
132
|
-
jv.commit
|
133
|
-
Embulk.logger.debug { "embulk-output-vertica: COMMITTED!" }
|
134
166
|
rescue java.sql.SQLDataException => e
|
135
167
|
if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
|
136
|
-
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{
|
168
|
+
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{rejected_row_nums}"
|
137
169
|
else
|
138
|
-
Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
|
170
|
+
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{rejected_row_nums}"
|
139
171
|
end
|
140
172
|
Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
|
141
173
|
jv.rollback
|
142
174
|
raise e # die transaction
|
143
175
|
rescue => e
|
144
|
-
Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
|
176
|
+
Embulk.logger.warn "embulk-output-vertica: ROLLBACK! #{e.class} #{e.message}"
|
145
177
|
jv.rollback
|
146
178
|
raise e
|
147
179
|
end
|
@@ -179,11 +211,6 @@ module Embulk
|
|
179
211
|
|
180
212
|
# private
|
181
213
|
|
182
|
-
def copy(conn, sql, &block)
|
183
|
-
Embulk.logger.debug "embulk-output-vertica: #{sql}"
|
184
|
-
results, rejects = conn.copy(sql, &block)
|
185
|
-
end
|
186
|
-
|
187
214
|
def copy_sql
|
188
215
|
@copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
|
189
216
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-02-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jvertica
|