embulk-output-vertica 0.5.3 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/CHANGELOG.md +6 -0
- data/README.md +2 -1
- data/embulk-output-vertica.gemspec +1 -1
- data/example/abort.yml +33 -0
- data/{example.csv → example/example.csv} +0 -0
- data/{example.yml → example/example.yml} +1 -1
- data/lib/embulk/output/vertica/output_thread.rb +44 -8
- data/lib/embulk/output/vertica.rb +7 -0
- metadata +26 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fce85ac19d1f8d081c0c35217be4a624a281e242
|
4
|
+
data.tar.gz: 35c8a1b4e33cf62653e84e69fb4e611db79f83f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d2126b126a0ffb47be5ed8218823096395e23cc9afc3fc67bcc1b0cd5e2ed3a90c17daeedddb4ebf1020972f005dec9288c363b8e2cae88b576845774c8c3e4
|
7
|
+
data.tar.gz: c912765ff8a69422e607fe0b6d52ec16ee09c02434277c1be45f5d511470ce89f62a8b28fb80d03b804b2e22995fc91916fc45fddafbe80f97412faab4cffbaf
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -20,6 +20,7 @@
|
|
20
20
|
- **copy_mode**: specifies how data is loaded into the database. See vertica documents for details. (`AUTO`, `DIRECT`, or `TRICKLE`. default: `AUTO`)
|
21
21
|
- **pool**: number of output threads, this number controls number of concurrency to issue COPY statements (integer, default: processor_count, that is, number of threads in input plugin)
|
22
22
|
- **abort_on_error**: stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
|
23
|
+
- **compress**: compress input (`GZIP`, or `UNCOMPRESSED`, default: `UNCOMPRESSED`)
|
23
24
|
- **reject_on_materialized_type_error**: uses `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of column types and value types do not fit, ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
|
24
25
|
- **default_timezone**: the default timezone for column_options (string, default is "UTC")
|
25
26
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
@@ -72,7 +73,7 @@ Run example:
|
|
72
73
|
|
73
74
|
```
|
74
75
|
$ embulk bundle install --path vendor/bundle
|
75
|
-
$ embulk -J-O -R--dev run -b .
|
76
|
+
$ embulk -J-O -R--dev run -b . -l debug example.yml
|
76
77
|
```
|
77
78
|
|
78
79
|
Release gem:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "embulk-output-vertica"
|
3
|
-
spec.version = "0.5.
|
3
|
+
spec.version = "0.5.4"
|
4
4
|
spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
|
5
5
|
spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
|
6
6
|
spec.summary = "Vertica output plugin for Embulk"
|
data/example/abort.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
13
|
+
- {name: string_date, type: string}
|
14
|
+
- {name: foo, type: string}
|
15
|
+
- {name: bar, type: string}
|
16
|
+
- {name: id, type: long}
|
17
|
+
- {name: name, type: string}
|
18
|
+
- {name: score, type: double}
|
19
|
+
out:
|
20
|
+
type: vertica
|
21
|
+
host: 127.0.0.1
|
22
|
+
user: dbadmin
|
23
|
+
password: xxxxxxx
|
24
|
+
database: vdb
|
25
|
+
schema: sandbox
|
26
|
+
table: embulk_test
|
27
|
+
mode: replace
|
28
|
+
copy_mode: DIRECT
|
29
|
+
abort_on_error: true
|
30
|
+
reject_on_materialized_type_error: true
|
31
|
+
default_timezone: 'Asia/Tokyo'
|
32
|
+
column_options:
|
33
|
+
bar: {type: INT}
|
File without changes
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
1
3
|
module Embulk
|
2
4
|
module Output
|
3
5
|
class Vertica < OutputPlugin
|
@@ -37,6 +39,13 @@ module Embulk
|
|
37
39
|
@num_rejected_rows = 0
|
38
40
|
@outer_thread = Thread.current
|
39
41
|
@thread_active = false
|
42
|
+
|
43
|
+
case task['compress']
|
44
|
+
when 'GZIP'
|
45
|
+
@write_proc = self.method(:write_gzip)
|
46
|
+
else
|
47
|
+
@write_proc = self.method(:write_uncompressed)
|
48
|
+
end
|
40
49
|
end
|
41
50
|
|
42
51
|
def enqueue(page)
|
@@ -49,28 +58,49 @@ module Embulk
|
|
49
58
|
end
|
50
59
|
end
|
51
60
|
|
61
|
+
def write_gzip(io, page, &block)
|
62
|
+
buf = Zlib::Deflate.new
|
63
|
+
write_buf(buf, page, &block)
|
64
|
+
io << buf.finish
|
65
|
+
end
|
66
|
+
|
67
|
+
def write_uncompressed(io, page, &block)
|
68
|
+
buf = ''
|
69
|
+
write_buf(buf, page, &block)
|
70
|
+
io << buf
|
71
|
+
end
|
72
|
+
|
73
|
+
def write_buf(buf, page, &block)
|
74
|
+
page.each do |record|
|
75
|
+
yield(record) if block_given?
|
76
|
+
Embulk.logger.trace { "embulk-output-vertica: record #{record}" }
|
77
|
+
json = to_json(record)
|
78
|
+
Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
|
79
|
+
buf << json << "\n"
|
80
|
+
@num_input_rows += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
52
84
|
def run
|
53
85
|
Embulk.logger.debug { "embulk-output-vertica: thread started" }
|
54
86
|
Vertica.connect(@task) do |jv|
|
55
87
|
json = nil # for log
|
56
88
|
begin
|
89
|
+
last_record = nil
|
57
90
|
num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
|
58
91
|
while page = @queue.pop
|
59
92
|
if page == 'finish'
|
60
|
-
Embulk.logger.
|
93
|
+
Embulk.logger.trace { "embulk-output-vertica: popped finish" }
|
61
94
|
break
|
62
95
|
end
|
63
96
|
Embulk.logger.trace { "embulk-output-vertica: dequeued" }
|
64
97
|
|
65
|
-
|
66
|
-
|
67
|
-
json = to_json(record)
|
68
|
-
Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
|
69
|
-
stdin << json << "\n"
|
70
|
-
@num_input_rows += 1
|
98
|
+
@write_proc.call(stdin, page) do |record|
|
99
|
+
last_record = record
|
71
100
|
end
|
72
101
|
end
|
73
102
|
end
|
103
|
+
Embulk.logger.debug { "embulk-output-vertica: thread finished" }
|
74
104
|
num_rejected_rows = rejects.size
|
75
105
|
@num_output_rows += num_output_rows
|
76
106
|
@num_rejected_rows += num_rejected_rows
|
@@ -83,6 +113,7 @@ module Embulk
|
|
83
113
|
else
|
84
114
|
Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
|
85
115
|
end
|
116
|
+
Embulk.logger.info { "embulk-output-vertica: last_record: #{to_json(last_record)}" }
|
86
117
|
jv.rollback
|
87
118
|
raise e # die transaction
|
88
119
|
rescue => e
|
@@ -108,6 +139,7 @@ module Embulk
|
|
108
139
|
@thread_active = false
|
109
140
|
if @thread.alive?
|
110
141
|
@queue.push('finish')
|
142
|
+
Embulk.logger.trace { "embulk-output-vertica: pushed finish" }
|
111
143
|
Thread.pass
|
112
144
|
@thread.join
|
113
145
|
else
|
@@ -129,7 +161,7 @@ module Embulk
|
|
129
161
|
end
|
130
162
|
|
131
163
|
def copy_sql
|
132
|
-
@copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
|
164
|
+
@copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
|
133
165
|
end
|
134
166
|
|
135
167
|
def to_json(record)
|
@@ -150,6 +182,10 @@ module Embulk
|
|
150
182
|
::Jvertica.quote_identifier(@task['temp_table'])
|
151
183
|
end
|
152
184
|
|
185
|
+
def compress
|
186
|
+
" #{@task['compress']}"
|
187
|
+
end
|
188
|
+
|
153
189
|
def copy_mode
|
154
190
|
" #{@task['copy_mode']}"
|
155
191
|
end
|
@@ -27,6 +27,7 @@ module Embulk
|
|
27
27
|
'mode' => config.param('mode', :string, :default => 'insert'),
|
28
28
|
'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
|
29
29
|
'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
|
30
|
+
'compress' => config.param('compress', :string, :default => 'UNCOMPRESSED'),
|
30
31
|
'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
|
31
32
|
'column_options' => config.param('column_options', :hash, :default => {}),
|
32
33
|
'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
|
@@ -52,6 +53,12 @@ module Embulk
|
|
52
53
|
raise ConfigError.new "`copy_mode` must be one of AUTO, DIRECT, TRICKLE"
|
53
54
|
end
|
54
55
|
|
56
|
+
# ToDo: Support BZIP, LZO
|
57
|
+
task['compress'] = task['compress'].upcase
|
58
|
+
unless %w[GZIP UNCOMPRESSED].include?(task['compress'])
|
59
|
+
raise ConfigError.new "`compress` must be one of GZIP, UNCOMPRESSED"
|
60
|
+
end
|
61
|
+
|
55
62
|
now = Time.now
|
56
63
|
unique_name = "%08x%08x" % [now.tv_sec, now.tv_nsec]
|
57
64
|
task['temp_table'] = "#{task['table']}_LOAD_TEMP_#{unique_name}"
|
metadata
CHANGED
@@ -1,72 +1,72 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-vertica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- eiji.sekiya
|
8
8
|
- Naotoshi Seo
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-01-
|
12
|
+
date: 2016-01-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: jvertica
|
16
|
-
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '0.2'
|
21
|
-
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
24
|
requirements:
|
23
25
|
- - "~>"
|
24
26
|
- !ruby/object:Gem::Version
|
25
27
|
version: '0.2'
|
26
|
-
prerelease: false
|
27
|
-
type: :runtime
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: tzinfo
|
30
|
-
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: '0'
|
35
|
-
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
38
|
requirements:
|
37
39
|
- - ">="
|
38
40
|
- !ruby/object:Gem::Version
|
39
41
|
version: '0'
|
40
|
-
prerelease: false
|
41
|
-
type: :runtime
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: bundler
|
44
|
-
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '1.7'
|
49
|
-
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
52
|
requirements:
|
51
53
|
- - "~>"
|
52
54
|
- !ruby/object:Gem::Version
|
53
55
|
version: '1.7'
|
54
|
-
prerelease: false
|
55
|
-
type: :development
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rake
|
58
|
-
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
62
|
version: '10.0'
|
63
|
-
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
64
66
|
requirements:
|
65
67
|
- - "~>"
|
66
68
|
- !ruby/object:Gem::Version
|
67
69
|
version: '10.0'
|
68
|
-
prerelease: false
|
69
|
-
type: :development
|
70
70
|
description: Dump records to vertica
|
71
71
|
email:
|
72
72
|
- eiji.sekiya.0326@gmail.com
|
@@ -82,8 +82,9 @@ files:
|
|
82
82
|
- README.md
|
83
83
|
- Rakefile
|
84
84
|
- embulk-output-vertica.gemspec
|
85
|
-
- example.
|
86
|
-
- example.
|
85
|
+
- example/abort.yml
|
86
|
+
- example/example.csv
|
87
|
+
- example/example.yml
|
87
88
|
- lib/embulk/output/vertica.rb
|
88
89
|
- lib/embulk/output/vertica/output_thread.rb
|
89
90
|
- lib/embulk/output/vertica/value_converter_factory.rb
|
@@ -91,7 +92,7 @@ homepage: https://github.com/eratostennis/embulk-output-vertica
|
|
91
92
|
licenses:
|
92
93
|
- MIT
|
93
94
|
metadata: {}
|
94
|
-
post_install_message:
|
95
|
+
post_install_message:
|
95
96
|
rdoc_options: []
|
96
97
|
require_paths:
|
97
98
|
- lib
|
@@ -106,9 +107,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
107
|
- !ruby/object:Gem::Version
|
107
108
|
version: '0'
|
108
109
|
requirements: []
|
109
|
-
rubyforge_project:
|
110
|
-
rubygems_version: 2.
|
111
|
-
signing_key:
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 2.5.1
|
112
|
+
signing_key:
|
112
113
|
specification_version: 4
|
113
114
|
summary: Vertica output plugin for Embulk
|
114
115
|
test_files: []
|