embulk-output-vertica 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d81c4848d4fad7d85e7faeff05cf759c49488d1
4
- data.tar.gz: 21937e37ae064520bd43baaa0af03f9d593509a9
3
+ metadata.gz: 4fb8733ef4ad44731139ff6f8cb63aa7dfb6aa1c
4
+ data.tar.gz: b3d80ae9599a03762ac7e65cc2bcfb033f51ecae
5
5
  SHA512:
6
- metadata.gz: ef5e6d9dee4d26a7ddd13d67abdc59135e06c72a40151741941828fc23dfa8fc3552791f89e25d8e05027cf15035028323e9cb1e6fd29339a7242e92ddd443ce
7
- data.tar.gz: c8068505624bc2f868dc3b402bc6a4af15af219c99696d5fbd91c13efa438adf1236fb1e27d12985febaa1d8c4f3362839f5a99ba6ce621c2b108bd8e26f2ea1
6
+ metadata.gz: c84bb2d342b70fbbfbbd5656e41e3942115ed25fa2e786bdf7c9d10bf4540cabd0370cfc5d339c107657f71bb8b4b5365eac390d7d104f9f2e905ba3411867e9
7
+ data.tar.gz: 92df6faa9d4b0b2be4e6085cf6b2f87986c9f6636ce622f4c901d7e691133d691f5f286578ed48a87a40bfffce49653477abeb26849d272f9e48dea4cebfb148
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.5.6 (2016/01/22)
2
+
3
+ Enhancements:
4
+
5
+ * Perform to_json in embulk threads. This achieves better performance when pool < num_threads
6
+
1
7
  # 0.5.5 (2016/01/22)
2
8
 
3
9
  Changes:
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-output-vertica"
3
- spec.version = "0.5.5"
3
+ spec.version = "0.5.6"
4
4
  spec.authors = ["eiji.sekiya", "Naotoshi Seo"]
5
5
  spec.email = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
6
6
  spec.summary = "Vertica output plugin for Embulk"
@@ -6,14 +6,19 @@ module Embulk
6
6
  class OutputThreadPool
7
7
  def initialize(task, schema, size)
8
8
  @size = size
9
- converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
10
- @output_threads = size.times.map { OutputThread.new(task, schema, converters) }
9
+ @schema = schema
10
+ @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
11
+ @output_threads = size.times.map { OutputThread.new(task) }
11
12
  @current_index = 0
12
13
  end
13
14
 
14
15
  def enqueue(page)
16
+ json_page = []
17
+ page.each do |record|
18
+ json_page << to_json(record)
19
+ end
15
20
  @mutex.synchronize do
16
- @output_threads[@current_index].enqueue(page)
21
+ @output_threads[@current_index].enqueue(json_page)
17
22
  @current_index = (@current_index + 1) % @size
18
23
  end
19
24
  end
@@ -26,14 +31,18 @@ module Embulk
26
31
  def commit
27
32
  task_reports = @size.times.map {|i| @output_threads[i].commit }
28
33
  end
34
+
35
+ def to_json(record)
36
+ Hash[*(@schema.names.zip(record).map do |column_name, value|
37
+ [column_name, @converters[column_name].call(value)]
38
+ end.flatten!(1))].to_json
39
+ end
29
40
  end
30
41
 
31
42
  class OutputThread
32
- def initialize(task, schema, converters)
43
+ def initialize(task)
33
44
  @task = task
34
- @schema = schema
35
45
  @queue = SizedQueue.new(1)
36
- @converters = converters
37
46
  @num_input_rows = 0
38
47
  @num_output_rows = 0
39
48
  @num_rejected_rows = 0
@@ -49,10 +58,10 @@ module Embulk
49
58
  end
50
59
  end
51
60
 
52
- def enqueue(page)
61
+ def enqueue(json_page)
53
62
  if @thread_active and @thread.alive?
54
63
  Embulk.logger.trace { "embulk-output-vertica: enqueue" }
55
- @queue.push(page)
64
+ @queue.push(json_page)
56
65
  else
57
66
  Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" }
58
67
  raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue"
@@ -71,13 +80,11 @@ module Embulk
71
80
  io << buf
72
81
  end
73
82
 
74
- def write_buf(buf, page, &block)
75
- page.each do |record|
83
+ def write_buf(buf, json_page, &block)
84
+ json_page.each do |record|
76
85
  yield(record) if block_given?
77
86
  Embulk.logger.trace { "embulk-output-vertica: record #{record}" }
78
- json = to_json(record)
79
- Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
80
- buf << json << "\n"
87
+ buf << record << "\n"
81
88
  @num_input_rows += 1
82
89
  end
83
90
  now = Time.now
@@ -90,18 +97,17 @@ module Embulk
90
97
  def run
91
98
  Embulk.logger.debug { "embulk-output-vertica: thread started" }
92
99
  Vertica.connect(@task) do |jv|
93
- json = nil # for log
94
100
  begin
95
101
  last_record = nil
96
102
  num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
97
- while page = @queue.pop
98
- if page == 'finish'
103
+ while json_page = @queue.pop
104
+ if json_page == 'finish'
99
105
  Embulk.logger.trace { "embulk-output-vertica: popped finish" }
100
106
  break
101
107
  end
102
108
  Embulk.logger.trace { "embulk-output-vertica: dequeued" }
103
109
 
104
- @write_proc.call(stdin, page) do |record|
110
+ @write_proc.call(stdin, json_page) do |record|
105
111
  last_record = record
106
112
  end
107
113
  end
@@ -115,11 +121,11 @@ module Embulk
115
121
  Embulk.logger.debug { "embulk-output-vertica: COMMITTED!" }
116
122
  rescue java.sql.SQLDataException => e
117
123
  if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
118
- Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
124
+ Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{last_record}"
119
125
  else
120
126
  Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
121
127
  end
122
- Embulk.logger.info { "embulk-output-vertica: last_record: #{to_json(last_record)}" }
128
+ Embulk.logger.info { "embulk-output-vertica: last_record: #{last_record}" }
123
129
  jv.rollback
124
130
  raise e # die transaction
125
131
  rescue => e
@@ -170,12 +176,6 @@ module Embulk
170
176
  @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
171
177
  end
172
178
 
173
- def to_json(record)
174
- Hash[*(@schema.names.zip(record).map do |column_name, value|
175
- [column_name, @converters[column_name].call(value)]
176
- end.flatten!(1))].to_json
177
- end
178
-
179
179
  def quoted_schema
180
180
  ::Jvertica.quote_identifier(@task['schema'])
181
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-vertica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - eiji.sekiya