embulk-output-vertica 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2db4b28e6259edb2d4cacc084586ec82c876ccac
4
- data.tar.gz: bbfe10034e08509f04e259293bde211543cdd1f1
3
+ metadata.gz: b178cb9ffe687570117d6aa8edfb55e7c77c446d
4
+ data.tar.gz: 72195fdd08f4929162602381bcd79432545067c0
5
5
  SHA512:
6
- metadata.gz: 8ad723dfc4972dcc125026d10098b572baeac9f33e384a5f560674f4c322d34185e168723584e287e40461975d7f0fcd7856b2c7fd4f7d03ef1e506126d74281
7
- data.tar.gz: f5b89412688b7609d2a54e2878d469ec946204df22f423d3cf0108b26324559f2791a9509192981f6c937cff8221e948bbb883253a2da396e2ec8f145125a06e
6
+ metadata.gz: 3f132dba10f45571d856cf33478b80f73c154413dd78268a43510461f24f223b04def25cada97e204b3053c6d2ebd09054a6d08816bfe4d26cf102b908ff5bc4
7
+ data.tar.gz: 10fcde36ab57437e724b4e0eb63586fcb218b300d799875777b15ba4769e9e3402dc85cc3b77f882897fdc73515e258b2ce3e57261d79eb98d921b8bab226b7f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.4.0 (2015/11/24)
2
+
3
+ Enhancements:
4
+
5
+ * Support connection pool
6
+
1
7
  # 0.3.1 (2015/11/20)
2
8
 
3
9
  Fixes:
data/README.md CHANGED
@@ -18,6 +18,8 @@
18
18
  - **table**: table name (string, required)
19
19
  - **mode**: "insert", or "replace". See bellow. (string, default: insert)
20
20
  - **copy_mode**: specifies how data is loaded into the database. See vertica documents for details. (`AUTO`, `DIRECT`, or `TRICKLE`. default: `AUTO`)
21
+ - **pool**: number of connection pools, this number controls number of concurrency to issue COPY statements (integer, default: processor_count, that is, number of threads in input plugin)
22
+ - **pool_timeout**: timeout to checkout a connection from connection pools (seconds, default: 600)
21
23
  - **abort_on_error**: stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
22
24
  - **reject_on_materialized_type_error**: uses `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of column types and value types do not fit, ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
23
25
  - **default_timezone**: the default timezone for column_options (string, default is "UTC")
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.3.1"
3
+ spec.version = "0.4.0"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
15
15
 
16
16
  spec.add_dependency "jvertica", "~> 0.2"
17
17
  spec.add_dependency "tzinfo"
18
+ spec.add_dependency "connection_pool"
18
19
  spec.add_development_dependency "bundler", "~> 1.7"
19
20
  spec.add_development_dependency "rake", "~> 10.0"
20
21
  end
@@ -1,4 +1,5 @@
1
1
  require 'jvertica'
2
+ require 'connection_pool'
2
3
  require_relative 'vertica/value_converter_factory'
3
4
 
4
5
  module Embulk
@@ -9,23 +10,42 @@ module Embulk
9
10
  class Error < StandardError; end
10
11
  class NotSupportedType < Error; end
11
12
 
13
+ def self.connection_pool
14
+ @connection_pool ||= @connection_pool_proc.call
15
+ end
16
+
12
17
  def self.transaction(config, schema, processor_count, &control)
13
18
  task = {
14
- 'host' => config.param('host', :string, :default => 'localhost'),
15
- 'port' => config.param('port', :integer, :default => 5433),
16
- 'user' => config.param('user', :string, :default => nil),
17
- 'username' => config.param('username', :string, :default => nil), # alias to :user for backward compatibility
18
- 'password' => config.param('password', :string, :default => ''),
19
- 'database' => config.param('database', :string, :default => 'vdb'),
20
- 'schema' => config.param('schema', :string, :default => 'public'),
21
- 'table' => config.param('table', :string),
22
- 'mode' => config.param('mode', :string, :default => 'insert'),
23
- 'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
24
- 'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
25
- 'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
26
- 'column_options' => config.param('column_options', :hash, :default => {}),
19
+ 'host' => config.param('host', :string, :default => 'localhost'),
20
+ 'port' => config.param('port', :integer, :default => 5433),
21
+ 'user' => config.param('user', :string, :default => nil),
22
+ 'username' => config.param('username', :string, :default => nil), # alias to :user for backward compatibility
23
+ 'password' => config.param('password', :string, :default => ''),
24
+ 'database' => config.param('database', :string, :default => 'vdb'),
25
+ 'schema' => config.param('schema', :string, :default => 'public'),
26
+ 'table' => config.param('table', :string),
27
+ 'mode' => config.param('mode', :string, :default => 'insert'),
28
+ 'copy_mode' => config.param('copy_mode', :string, :default => 'AUTO'),
29
+ 'abort_on_error' => config.param('abort_on_error', :bool, :default => false),
30
+ 'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
31
+ 'column_options' => config.param('column_options', :hash, :default => {}),
27
32
  'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
33
+ 'pool' => config.param('pool', :integer, :default => processor_count),
34
+ 'pool_timeout' => config.param('pool_timeout', :integer, :default => 600),
28
35
  }
36
+ task['user'] ||= task['username']
37
+
38
+ @connection_pool_proc = Proc.new do
39
+ ConnectionPool.new(size: task['pool'], timeout: task['pool_timeout']) do
40
+ ::Jvertica.connect({
41
+ host: task['host'],
42
+ port: task['port'],
43
+ user: task['user'],
44
+ password: task['password'],
45
+ database: task['database'],
46
+ })
47
+ end
48
+ end
29
49
 
30
50
  task['user'] ||= task['username']
31
51
  unless task['user']
@@ -53,7 +73,7 @@ module Embulk
53
73
  sql_schema_table = self.sql_schema_from_embulk_schema(schema, task['column_options'])
54
74
 
55
75
  # create the target table
56
- connect(task) do |jv|
76
+ connection_pool.with do |jv|
57
77
  query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}]) if task['mode'] == 'REPLACE'
58
78
  query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema_table})])
59
79
  end
@@ -61,7 +81,7 @@ module Embulk
61
81
  sql_schema_temp_table = self.sql_schema_from_table(task)
62
82
 
63
83
  # create a temp table
64
- connect(task) do |jv|
84
+ connection_pool.with do |jv|
65
85
  query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
66
86
  query(jv, %[CREATE TABLE #{quoted_schema}.#{quoted_temp_table} (#{sql_schema_temp_table})])
67
87
  end
@@ -69,18 +89,29 @@ module Embulk
69
89
  begin
70
90
  # insert data into the temp table
71
91
  task_reports = yield(task) # obtain an array of task_reports where one report is of a task
92
+ connection_pool.shutdown do |jv| # just don't know how to loop all connections
93
+ jv.commit
94
+ Embulk.logger.info { "embulk-output-vertica: COMMIT!" }
95
+ jv.close rescue nil
96
+ end
97
+ @connection_pool = nil
72
98
  Embulk.logger.info { "embulk-output-vertica: task_reports: #{task_reports.to_json}" }
73
99
 
74
100
  # insert select from the temp table
75
- connect(task) do |jv|
101
+ connection_pool.with do |jv|
76
102
  query(jv, %[INSERT INTO #{quoted_schema}.#{quoted_table} SELECT * FROM #{quoted_schema}.#{quoted_temp_table}])
77
103
  jv.commit
78
104
  end
79
105
  ensure
80
- connect(task) do |jv|
106
+ connection_pool.with do |jv|
81
107
  # clean up the temp table
108
+ Embulk.logger.debug { "embulk-output-vertica: select count #{query(jv, %[SELECT count(*) FROM #{quoted_schema}.#{quoted_temp_table}]).map {|row| row.to_h }.join("\n") rescue nil}" }
109
+ Embulk.logger.trace { "embulk-output-vertica: select limit 10\n#{query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_temp_table} LIMIT 10]).map {|row| row.to_h }.join("\n") rescue nil}" }
82
110
  query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
83
- Embulk.logger.debug { "embulk-output-vertica: select result #{query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_table} LIMIT 10]).map {|row| row.to_h }.join("\n") rescue nil}" }
111
+ end
112
+
113
+ connection_pool.shutdown do |jv|
114
+ jv.close rescue nil
84
115
  end
85
116
  end
86
117
  # this is for -o next_config option, add some paramters for next time execution if wants
@@ -91,39 +122,44 @@ module Embulk
91
122
  def initialize(task, schema, index)
92
123
  super
93
124
  @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
94
- Embulk.logger.debug { @converters.to_s }
95
- @jv = self.class.connect(task)
125
+ Embulk.logger.trace { @converters.to_s }
96
126
  @num_input_rows = 0
97
127
  @num_output_rows = 0
98
128
  @num_rejected_rows = 0
99
129
  end
100
130
 
131
+ def connection_pool
132
+ self.class.connection_pool
133
+ end
134
+
101
135
  def close
102
- @jv.close
136
+ # do not close connection_pool on each thread / page
103
137
  end
104
138
 
105
139
  def add(page)
106
- json = nil # for log
107
- begin
108
- num_output_rows, rejects = copy(@jv, copy_sql) do |stdin|
109
- page.each do |record|
110
- json = to_json(record)
111
- Embulk.logger.debug { "embulk-output-vertica: to_json #{json}" }
112
- stdin << json << "\n"
113
- @num_input_rows += 1
140
+ connection_pool.with do |jv| # block if no available connection left
141
+ json = nil # for log
142
+ begin
143
+ num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
144
+ page.each do |record|
145
+ json = to_json(record)
146
+ Embulk.logger.debug { "embulk-output-vertica: to_json #{json}" }
147
+ stdin << json << "\n"
148
+ @num_input_rows += 1
149
+ end
114
150
  end
151
+ num_rejected_rows = rejects.size
152
+ @num_output_rows += num_output_rows
153
+ @num_rejected_rows += num_rejected_rows
154
+ rescue java.sql.SQLDataException => e
155
+ jv.rollback
156
+ if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
157
+ Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
158
+ else
159
+ Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
160
+ end
161
+ raise e # die transaction
115
162
  end
116
- num_rejected_rows = rejects.size
117
- @num_output_rows += num_output_rows
118
- @num_rejected_rows += num_rejected_rows
119
- rescue java.sql.SQLDataException => e
120
- @jv.rollback
121
- if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
122
- Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
123
- else
124
- Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
125
- end
126
- raise e # die transaction
127
163
  end
128
164
  end
129
165
 
@@ -133,9 +169,10 @@ module Embulk
133
169
  def abort
134
170
  end
135
171
 
172
+ # this is called after processing all pages in a thread
173
+ # we do commit on #transaction for all connection pools, not at here
136
174
  def commit
137
- @jv.commit
138
- Embulk.logger.debug { "embulk-output-vertica: COMMIT! #{@num_output_rows} rows" }
175
+ Embulk.logger.debug { "embulk-output-vertica: #{@num_output_rows} rows" }
139
176
  task_report = {
140
177
  "num_input_rows" => @num_input_rows,
141
178
  "num_output_rows" => @num_output_rows,
@@ -145,25 +182,6 @@ module Embulk
145
182
 
146
183
  private
147
184
 
148
- def self.connect(task)
149
- jv = ::Jvertica.connect({
150
- host: task['host'],
151
- port: task['port'],
152
- user: task['user'],
153
- password: task['password'],
154
- database: task['database'],
155
- })
156
-
157
- if block_given?
158
- begin
159
- yield jv
160
- ensure
161
- jv.close
162
- end
163
- end
164
- jv
165
- end
166
-
167
185
  # @param [Schema] schema embulk defined column types
168
186
  # @param [Hash] column_options user defined column types
169
187
  # @return [String] sql schema used to CREATE TABLE
@@ -197,7 +215,7 @@ module Embulk
197
215
  "WHERE table_schema = #{quoted_schema} AND table_name = #{quoted_table}"
198
216
 
199
217
  sql_schema = {}
200
- connect(task) do |jv|
218
+ connection_pool.with do |jv|
201
219
  result = query(jv, sql)
202
220
  sql_schema = result.map {|row| [row[0], row[1]] }
203
221
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-11-20 00:00:00.000000000 Z
12
+ date: 2015-11-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: jvertica
@@ -39,6 +39,20 @@ dependencies:
39
39
  version: '0'
40
40
  prerelease: false
41
41
  type: :runtime
42
+ - !ruby/object:Gem::Dependency
43
+ name: connection_pool
44
+ version_requirements: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ prerelease: false
55
+ type: :runtime
42
56
  - !ruby/object:Gem::Dependency
43
57
  name: bundler
44
58
  version_requirements: !ruby/object:Gem::Requirement