embulk-output-vertica 0.5.5 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d81c4848d4fad7d85e7faeff05cf759c49488d1
4
- data.tar.gz: 21937e37ae064520bd43baaa0af03f9d593509a9
3
+ metadata.gz: 4fb8733ef4ad44731139ff6f8cb63aa7dfb6aa1c
4
+ data.tar.gz: b3d80ae9599a03762ac7e65cc2bcfb033f51ecae
5
5
  SHA512:
6
- metadata.gz: ef5e6d9dee4d26a7ddd13d67abdc59135e06c72a40151741941828fc23dfa8fc3552791f89e25d8e05027cf15035028323e9cb1e6fd29339a7242e92ddd443ce
7
- data.tar.gz: c8068505624bc2f868dc3b402bc6a4af15af219c99696d5fbd91c13efa438adf1236fb1e27d12985febaa1d8c4f3362839f5a99ba6ce621c2b108bd8e26f2ea1
6
+ metadata.gz: c84bb2d342b70fbbfbbd5656e41e3942115ed25fa2e786bdf7c9d10bf4540cabd0370cfc5d339c107657f71bb8b4b5365eac390d7d104f9f2e905ba3411867e9
7
+ data.tar.gz: 92df6faa9d4b0b2be4e6085cf6b2f87986c9f6636ce622f4c901d7e691133d691f5f286578ed48a87a40bfffce49653477abeb26849d272f9e48dea4cebfb148
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.5.6 (2016/01/22)
2
+
3
+ Enhancements:
4
+
5
+ * Perform to_json in embulk threads. This achieves better performance when pool < num_threads
6
+
1
7
  # 0.5.5 (2016/01/22)
2
8
 
3
9
  Changes:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.5.5"
3
+ spec.version = "0.5.6"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
@@ -6,14 +6,19 @@ module Embulk
6
6
  class OutputThreadPool
7
7
  def initialize(task, schema, size)
8
8
  @size = size
9
- converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
10
- @output_threads = size.times.map { OutputThread.new(task, schema, converters) }
9
+ @schema = schema
10
+ @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
11
+ @output_threads = size.times.map { OutputThread.new(task) }
11
12
  @current_index = 0
12
13
  end
13
14
 
14
15
  def enqueue(page)
16
+ json_page = []
17
+ page.each do |record|
18
+ json_page << to_json(record)
19
+ end
15
20
  @mutex.synchronize do
16
- @output_threads[@current_index].enqueue(page)
21
+ @output_threads[@current_index].enqueue(json_page)
17
22
  @current_index = (@current_index + 1) % @size
18
23
  end
19
24
  end
@@ -26,14 +31,18 @@ module Embulk
26
31
  def commit
27
32
  task_reports = @size.times.map {|i| @output_threads[i].commit }
28
33
  end
34
+
35
+ def to_json(record)
36
+ Hash[*(@schema.names.zip(record).map do |column_name, value|
37
+ [column_name, @converters[column_name].call(value)]
38
+ end.flatten!(1))].to_json
39
+ end
29
40
  end
30
41
 
31
42
  class OutputThread
32
- def initialize(task, schema, converters)
43
+ def initialize(task)
33
44
  @task = task
34
- @schema = schema
35
45
  @queue = SizedQueue.new(1)
36
- @converters = converters
37
46
  @num_input_rows = 0
38
47
  @num_output_rows = 0
39
48
  @num_rejected_rows = 0
@@ -49,10 +58,10 @@ module Embulk
49
58
  end
50
59
  end
51
60
 
52
- def enqueue(page)
61
+ def enqueue(json_page)
53
62
  if @thread_active and @thread.alive?
54
63
  Embulk.logger.trace { "embulk-output-vertica: enqueue" }
55
- @queue.push(page)
64
+ @queue.push(json_page)
56
65
  else
57
66
  Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" }
58
67
  raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue"
@@ -71,13 +80,11 @@ module Embulk
71
80
  io << buf
72
81
  end
73
82
 
74
- def write_buf(buf, page, &block)
75
- page.each do |record|
83
+ def write_buf(buf, json_page, &block)
84
+ json_page.each do |record|
76
85
  yield(record) if block_given?
77
86
  Embulk.logger.trace { "embulk-output-vertica: record #{record}" }
78
- json = to_json(record)
79
- Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
80
- buf << json << "\n"
87
+ buf << record << "\n"
81
88
  @num_input_rows += 1
82
89
  end
83
90
  now = Time.now
@@ -90,18 +97,17 @@ module Embulk
90
97
  def run
91
98
  Embulk.logger.debug { "embulk-output-vertica: thread started" }
92
99
  Vertica.connect(@task) do |jv|
93
- json = nil # for log
94
100
  begin
95
101
  last_record = nil
96
102
  num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
97
- while page = @queue.pop
98
- if page == 'finish'
103
+ while json_page = @queue.pop
104
+ if json_page == 'finish'
99
105
  Embulk.logger.trace { "embulk-output-vertica: popped finish" }
100
106
  break
101
107
  end
102
108
  Embulk.logger.trace { "embulk-output-vertica: dequeued" }
103
109
 
104
- @write_proc.call(stdin, page) do |record|
110
+ @write_proc.call(stdin, json_page) do |record|
105
111
  last_record = record
106
112
  end
107
113
  end
@@ -115,11 +121,11 @@ module Embulk
115
121
  Embulk.logger.debug { "embulk-output-vertica: COMMITTED!" }
116
122
  rescue java.sql.SQLDataException => e
117
123
  if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
118
- Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
124
+ Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{last_record}"
119
125
  else
120
126
  Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
121
127
  end
122
- Embulk.logger.info { "embulk-output-vertica: last_record: #{to_json(last_record)}" }
128
+ Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
123
129
  jv.rollback
124
130
  raise e # die transaction
125
131
  rescue => e
@@ -170,12 +176,6 @@ module Embulk
170
176
  @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
171
177
  end
172
178
 
173
- def to_json(record)
174
- Hash[*(@schema.names.zip(record).map do |column_name, value|
175
- [column_name, @converters[column_name].call(value)]
176
- end.flatten!(1))].to_json
177
- end
178
-
179
179
  def quoted_schema
180
180
  ::Jvertica.quote_identifier(@task['schema'])
181
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya