embulk-output-vertica 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64254a208edbd6f0c254ed8cabdbee4fac53dbdf
4
- data.tar.gz: 41b73b19cdbc334be205a45a57c2dded37ff3a53
3
+ metadata.gz: fce85ac19d1f8d081c0c35217be4a624a281e242
4
+ data.tar.gz: 35c8a1b4e33cf62653e84e69fb4e611db79f83f1
5
5
  SHA512:
6
- metadata.gz: 3a97e62ed025bfb4b81423db3027a38709946a6900d68976f96fb20772c513852b80315722bbea0e0014c8425312e5b3340170319540e42a3945de511e653498
7
- data.tar.gz: ba2307d15902dafcb306c478f6ec993a411a4eef5a975b14125ae8ca9a73f1c0abe4064bc662124194017ac104172fb960101f3f5182937aefcb550601bd4f48
6
+ metadata.gz: 5d2126b126a0ffb47be5ed8218823096395e23cc9afc3fc67bcc1b0cd5e2ed3a90c17daeedddb4ebf1020972f005dec9288c363b8e2cae88b576845774c8c3e4
7
+ data.tar.gz: c912765ff8a69422e607fe0b6d52ec16ee09c02434277c1be45f5d511470ce89f62a8b28fb80d03b804b2e22995fc91916fc45fddafbe80f97412faab4cffbaf
data/.gitignore CHANGED
@@ -14,4 +14,4 @@
14
14
  mkmf.log
15
15
  vendor
16
16
  .ruby-version
17
- example2.yml
17
+ example2/
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.5.4 (2016/01/21)
2
+
3
+ Enhancements:
4
+
5
+ * Log rejected record
6
+
1
7
  # 0.5.3 (2016/01/09)
2
8
 
3
9
  Enhancements:
data/README.md CHANGED
@@ -20,6 +20,7 @@
20
20
  - **copy_mode**: specifies how data is loaded into the database. See vertica documents for details. (`AUTO`, `DIRECT`, or `TRICKLE`. default: `AUTO`)
21
21
  - **pool**: number of output threads, this number controls number of concurrency to issue COPY statements (integer, default: processor_count, that is, number of threads in input plugin)
22
22
  - **abort_on_error**: stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
23
+ - **compress**: compress input (`GZIP`, or `UNCOMPRESSED`, default: `UNCOMPRESSED`)
23
24
  - **reject_on_materialized_type_error**: uses `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of column types and value types do not fit, ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
24
25
  - **default_timezone**: the default timezone for column_options (string, default is "UTC")
25
26
  - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
@@ -72,7 +73,7 @@ Run example:
72
73
 
73
74
  ```
74
75
  $ embulk bundle install --path vendor/bundle
75
- $ embulk -J-O -R--dev run -b . run -l debug example.yml
76
+ $ embulk -J-O -R--dev run -b . -l debug example.yml
76
77
  ```
77
78
 
78
79
  Release gem:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.5.3"
3
+ spec.version = "0.5.4"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
data/example/abort.yml ADDED
@@ -0,0 +1,33 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/example.csv
4
+ parser:
5
+ type: csv
6
+ charset: UTF-8
7
+ newline: CRLF
8
+ null_string: 'NULL'
9
+ skip_header_lines: 1
10
+ comment_line_marker: '#'
11
+ columns:
12
+ - {name: timestamp_date, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
13
+ - {name: string_date, type: string}
14
+ - {name: foo, type: string}
15
+ - {name: bar, type: string}
16
+ - {name: id, type: long}
17
+ - {name: name, type: string}
18
+ - {name: score, type: double}
19
+ out:
20
+ type: vertica
21
+ host: 127.0.0.1
22
+ user: dbadmin
23
+ password: xxxxxxx
24
+ database: vdb
25
+ schema: sandbox
26
+ table: embulk_test
27
+ mode: replace
28
+ copy_mode: DIRECT
29
+ abort_on_error: true
30
+ reject_on_materialized_type_error: true
31
+ default_timezone: 'Asia/Tokyo'
32
+ column_options:
33
+ bar: {type: INT}
File without changes
@@ -7,7 +7,7 @@
7
7
  # score: integer
8
8
  in:
9
9
  type: file
10
- path_prefix: example.csv
10
+ path_prefix: example/example.csv
11
11
  parser:
12
12
  type: csv
13
13
  charset: UTF-8
@@ -1,3 +1,5 @@
1
+ require 'zlib'
2
+
1
3
  module Embulk
2
4
  module Output
3
5
  class Vertica < OutputPlugin
@@ -37,6 +39,13 @@ module Embulk
37
39
  @num_rejected_rows = 0
38
40
  @outer_thread = Thread.current
39
41
  @thread_active = false
42
+
43
+ case task['compress']
44
+ when 'GZIP'
45
+ @write_proc = self.method(:write_gzip)
46
+ else
47
+ @write_proc = self.method(:write_uncompressed)
48
+ end
40
49
  end
41
50
 
42
51
  def enqueue(page)
@@ -49,28 +58,49 @@ module Embulk
49
58
  end
50
59
  end
51
60
 
61
+ def write_gzip(io, page, &block)
62
+ buf = Zlib::Deflate.new
63
+ write_buf(buf, page, &block)
64
+ io << buf.finish
65
+ end
66
+
67
+ def write_uncompressed(io, page, &block)
68
+ buf = ''
69
+ write_buf(buf, page, &block)
70
+ io << buf
71
+ end
72
+
73
+ def write_buf(buf, page, &block)
74
+ page.each do |record|
75
+ yield(record) if block_given?
76
+ Embulk.logger.trace { "embulk-output-vertica: record #{record}" }
77
+ json = to_json(record)
78
+ Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
79
+ buf << json << "\n"
80
+ @num_input_rows += 1
81
+ end
82
+ end
83
+
52
84
  def run
53
85
  Embulk.logger.debug { "embulk-output-vertica: thread started" }
54
86
  Vertica.connect(@task) do |jv|
55
87
  json = nil # for log
56
88
  begin
89
+ last_record = nil
57
90
  num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
58
91
  while page = @queue.pop
59
92
  if page == 'finish'
60
- Embulk.logger.debug { "embulk-output-vertica: thread finished" }
93
+ Embulk.logger.trace { "embulk-output-vertica: popped finish" }
61
94
  break
62
95
  end
63
96
  Embulk.logger.trace { "embulk-output-vertica: dequeued" }
64
97
 
65
- page.each do |record|
66
- Embulk.logger.trace { "embulk-output-vertica: record #{record}" }
67
- json = to_json(record)
68
- Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
69
- stdin << json << "\n"
70
- @num_input_rows += 1
98
+ @write_proc.call(stdin, page) do |record|
99
+ last_record = record
71
100
  end
72
101
  end
73
102
  end
103
+ Embulk.logger.debug { "embulk-output-vertica: thread finished" }
74
104
  num_rejected_rows = rejects.size
75
105
  @num_output_rows += num_output_rows
76
106
  @num_rejected_rows += num_rejected_rows
@@ -83,6 +113,7 @@ module Embulk
83
113
  else
84
114
  Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
85
115
  end
116
+ Embulk.logger.info { "embulk-output-vertica: last_record: #{to_json(last_record)}" }
86
117
  jv.rollback
87
118
  raise e # die transaction
88
119
  rescue => e
@@ -108,6 +139,7 @@ module Embulk
108
139
  @thread_active = false
109
140
  if @thread.alive?
110
141
  @queue.push('finish')
142
+ Embulk.logger.trace { "embulk-output-vertica: pushed finish" }
111
143
  Thread.pass
112
144
  @thread.join
113
145
  else
@@ -129,7 +161,7 @@ module Embulk
129
161
  end
130
162
 
131
163
  def copy_sql
132
- @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
164
+ @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
133
165
  end
134
166
 
135
167
  def to_json(record)
@@ -150,6 +182,10 @@ module Embulk
150
182
  ::Jvertica.quote_identifier(@task['temp_table'])
151
183
  end
152
184
 
185
+ def compress
186
+ " #{@task['compress']}"
187
+ end
188
+
153
189
  def copy_mode
154
190
  " #{@task['copy_mode']}"
155
191
  end
@@ -27,6 +27,7 @@ module Embulk
27
27
  'mode' => config.param('mode', :string, :default => 'insert'),
28
28
  'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
29
29
  'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
30
+ 'compress' => config.param('compress', :string, :default => 'UNCOMPRESSED'),
30
31
  'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
31
32
  'column_options' => config.param('column_options', :hash, :default => {}),
32
33
  'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
@@ -52,6 +53,12 @@ module Embulk
52
53
  raise ConfigError.new "`copy_mode` must be one of AUTO, DIRECT, TRICKLE"
53
54
  end
54
55
 
56
+ # ToDo: Support BZIP, LZO
57
+ task['compress'] = task['compress'].upcase
58
+ unless %w[GZIP UNCOMPRESSED].include?(task['compress'])
59
+ raise ConfigError.new "`compress` must be one of GZIP, UNCOMPRESSED"
60
+ end
61
+
55
62
  now = Time.now
56
63
  unique_name = "%08x%08x" % [now.tv_sec, now.tv_nsec]
57
64
  task['temp_table'] = "#{task['table']}_LOAD_TEMP_#{unique_name}"
metadata CHANGED
@@ -1,72 +1,72 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya
8
8
  - Naotoshi Seo
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-09 00:00:00.000000000 Z
12
+ date: 2016-01-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jvertica
16
- version_requirements: !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0.2'
21
- requirement: !ruby/object:Gem::Requirement
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
22
24
  requirements:
23
25
  - - "~>"
24
26
  - !ruby/object:Gem::Version
25
27
  version: '0.2'
26
- prerelease: false
27
- type: :runtime
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: tzinfo
30
- version_requirements: !ruby/object:Gem::Requirement
30
+ requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
- requirement: !ruby/object:Gem::Requirement
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
36
38
  requirements:
37
39
  - - ">="
38
40
  - !ruby/object:Gem::Version
39
41
  version: '0'
40
- prerelease: false
41
- type: :runtime
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: bundler
44
- version_requirements: !ruby/object:Gem::Requirement
44
+ requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
48
  version: '1.7'
49
- requirement: !ruby/object:Gem::Requirement
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
50
52
  requirements:
51
53
  - - "~>"
52
54
  - !ruby/object:Gem::Version
53
55
  version: '1.7'
54
- prerelease: false
55
- type: :development
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rake
58
- version_requirements: !ruby/object:Gem::Requirement
58
+ requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
62
  version: '10.0'
63
- requirement: !ruby/object:Gem::Requirement
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
64
66
  requirements:
65
67
  - - "~>"
66
68
  - !ruby/object:Gem::Version
67
69
  version: '10.0'
68
- prerelease: false
69
- type: :development
70
70
  description: Dump records to vertica
71
71
  email:
72
72
  - eiji.sekiya.0326@gmail.com
@@ -82,8 +82,9 @@ files:
82
82
  - README.md
83
83
  - Rakefile
84
84
  - embulk-output-vertica.gemspec
85
- - example.csv
86
- - example.yml
85
+ - example/abort.yml
86
+ - example/example.csv
87
+ - example/example.yml
87
88
  - lib/embulk/output/vertica.rb
88
89
  - lib/embulk/output/vertica/output_thread.rb
89
90
  - lib/embulk/output/vertica/value_converter_factory.rb
@@ -91,7 +92,7 @@ homepage: https://github.com/eratostennis/embulk-output-vertica
91
92
  licenses:
92
93
  - MIT
93
94
  metadata: {}
94
- post_install_message:
95
+ post_install_message:
95
96
  rdoc_options: []
96
97
  require_paths:
97
98
  - lib
@@ -106,9 +107,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
107
  - !ruby/object:Gem::Version
107
108
  version: '0'
108
109
  requirements: []
109
- rubyforge_project:
110
- rubygems_version: 2.4.8
111
- signing_key:
110
+ rubyforge_project:
111
+ rubygems_version: 2.5.1
112
+ signing_key:
112
113
  specification_version: 4
113
114
  summary: Vertica output plugin for Embulk
114
115
  test_files: []