embulk-output-vertica 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +6 -0
 - data/README.md +1 -2
 - data/embulk-output-vertica.gemspec +1 -2
 - data/lib/embulk/output/vertica/output_thread.rb +148 -0
 - data/lib/embulk/output/vertica.rb +40 -119
 - metadata +2 -15
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 14be9147838a9b3e7e7c7ace08b5d26b491538cf
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 1f53ce38472f8c015e38e2b502cb652dfcaeef6e
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: f265d51ec0ffc498cbeaf66f6b86622d2cce48f2d9d5cac846250930bec95376b3dde1069f5aff12fd3c89a16d50172c3faed476652fd1cae9ac9d7eb582a04c
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 9eebc8e2346c03d908c9ca2c735623a6890d2e6ae852c3083efef543694b905e6d9d3babd24e1c7a9ced34433e13ee3e42a1bd0e28babfab6ab070e8009e8ead
         
     | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -18,8 +18,7 @@ 
     | 
|
| 
       18 
18 
     | 
    
         
             
            - **table**:    table name (string, required)
         
     | 
| 
       19 
19 
     | 
    
         
             
            - **mode**:     "insert", or "replace". See bellow. (string, default: insert)
         
     | 
| 
       20 
20 
     | 
    
         
             
            - **copy_mode**: specifies how data is loaded into the database. See vertica documents for details. (`AUTO`, `DIRECT`, or `TRICKLE`. default: `AUTO`)
         
     | 
| 
       21 
     | 
    
         
            -
            - **pool**: number of  
     | 
| 
       22 
     | 
    
         
            -
            - **pool_timeout**: timeout to checkout a connection from connection pools (seconds, default: 600)
         
     | 
| 
      
 21 
     | 
    
         
            +
            - **pool**: number of output threads, this number controls number of concurrency to issue COPY statements (integer, default: processor_count, that is, number of threads in input plugin)
         
     | 
| 
       23 
22 
     | 
    
         
             
            - **abort_on_error**: stops the COPY command if a row is rejected and rolls back the command. No data is loaded. (bool, default: false)
         
     | 
| 
       24 
23 
     | 
    
         
             
            - **reject_on_materialized_type_error**: uses `reject_on_materialized_type_error` option for fjsonparser(). This rejects rows if any of column types and value types do not fit, ex) double value into INT column fails. See vertica documents for details. (bool, default: false)
         
     | 
| 
       25 
24 
     | 
    
         
             
            - **default_timezone**: the default timezone for column_options (string, default is "UTC")
         
     | 
| 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Gem::Specification.new do |spec|
         
     | 
| 
       2 
2 
     | 
    
         
             
              spec.name          = "embulk-output-vertica"
         
     | 
| 
       3 
     | 
    
         
            -
              spec.version       = "0. 
     | 
| 
      
 3 
     | 
    
         
            +
              spec.version       = "0.5.0"
         
     | 
| 
       4 
4 
     | 
    
         
             
              spec.authors       = ["eiji.sekiya", "Naotoshi Seo"]
         
     | 
| 
       5 
5 
     | 
    
         
             
              spec.email         = ["eiji.sekiya.0326@gmail.com", "sonots@gmail.com"]
         
     | 
| 
       6 
6 
     | 
    
         
             
              spec.summary       = "Vertica output plugin for Embulk"
         
     | 
| 
         @@ -15,7 +15,6 @@ Gem::Specification.new do |spec| 
     | 
|
| 
       15 
15 
     | 
    
         | 
| 
       16 
16 
     | 
    
         
             
              spec.add_dependency "jvertica", "~> 0.2"
         
     | 
| 
       17 
17 
     | 
    
         
             
              spec.add_dependency "tzinfo"
         
     | 
| 
       18 
     | 
    
         
            -
              spec.add_dependency "connection_pool"
         
     | 
| 
       19 
18 
     | 
    
         
             
              spec.add_development_dependency "bundler", "~> 1.7"
         
     | 
| 
       20 
19 
     | 
    
         
             
              spec.add_development_dependency "rake", "~> 10.0"
         
     | 
| 
       21 
20 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,148 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Embulk
         
     | 
| 
      
 2 
     | 
    
         
            +
              module Output
         
     | 
| 
      
 3 
     | 
    
         
            +
                class Vertica < OutputPlugin
         
     | 
| 
      
 4 
     | 
    
         
            +
                  class OutputThreadPool
         
     | 
| 
      
 5 
     | 
    
         
            +
                    def initialize(task, schema, size)
         
     | 
| 
      
 6 
     | 
    
         
            +
                      @size = size
         
     | 
| 
      
 7 
     | 
    
         
            +
                      converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
         
     | 
| 
      
 8 
     | 
    
         
            +
                      @output_threads = size.times.map { OutputThread.new(task, schema, converters) }
         
     | 
| 
      
 9 
     | 
    
         
            +
                      @current_index = 0
         
     | 
| 
      
 10 
     | 
    
         
            +
                    end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                    def enqueue(page)
         
     | 
| 
      
 13 
     | 
    
         
            +
                      @output_threads[@current_index].enqueue(page)
         
     | 
| 
      
 14 
     | 
    
         
            +
                      @current_index = (@current_index + 1) % @size
         
     | 
| 
      
 15 
     | 
    
         
            +
                    end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                    def start
         
     | 
| 
      
 18 
     | 
    
         
            +
                      @size.times.map {|i| @output_threads[i].start }
         
     | 
| 
      
 19 
     | 
    
         
            +
                    end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                    def commit
         
     | 
| 
      
 22 
     | 
    
         
            +
                      task_reports = @size.times.map {|i| @output_threads[i].commit }
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                  class OutputThread
         
     | 
| 
      
 27 
     | 
    
         
            +
                    def initialize(task, schema, converters)
         
     | 
| 
      
 28 
     | 
    
         
            +
                      @task = task
         
     | 
| 
      
 29 
     | 
    
         
            +
                      @schema = schema
         
     | 
| 
      
 30 
     | 
    
         
            +
                      @queue = SizedQueue.new(1)
         
     | 
| 
      
 31 
     | 
    
         
            +
                      @converters = converters
         
     | 
| 
      
 32 
     | 
    
         
            +
                      @num_input_rows = 0
         
     | 
| 
      
 33 
     | 
    
         
            +
                      @num_output_rows = 0
         
     | 
| 
      
 34 
     | 
    
         
            +
                      @num_rejected_rows = 0
         
     | 
| 
      
 35 
     | 
    
         
            +
                    end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    def enqueue(page)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      if @thread.status.nil? # thread died by an error
         
     | 
| 
      
 39 
     | 
    
         
            +
                        @thread.join # raise the same error raised inside thread
         
     | 
| 
      
 40 
     | 
    
         
            +
                      end
         
     | 
| 
      
 41 
     | 
    
         
            +
                      if @thread.alive?
         
     | 
| 
      
 42 
     | 
    
         
            +
                        Embulk.logger.trace { "embulk-output-vertica: enqueued" }
         
     | 
| 
      
 43 
     | 
    
         
            +
                        @queue.push(page)
         
     | 
| 
      
 44 
     | 
    
         
            +
                      end
         
     | 
| 
      
 45 
     | 
    
         
            +
                    end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                    def run
         
     | 
| 
      
 48 
     | 
    
         
            +
                      Embulk.logger.debug { "embulk-output-vertica: thread started" }
         
     | 
| 
      
 49 
     | 
    
         
            +
                      Vertica.connect(@task) do |jv|
         
     | 
| 
      
 50 
     | 
    
         
            +
                        json = nil # for log
         
     | 
| 
      
 51 
     | 
    
         
            +
                        begin
         
     | 
| 
      
 52 
     | 
    
         
            +
                          num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
         
     | 
| 
      
 53 
     | 
    
         
            +
                            while page = @queue.pop
         
     | 
| 
      
 54 
     | 
    
         
            +
                              if page == 'finish'
         
     | 
| 
      
 55 
     | 
    
         
            +
                                Embulk.logger.debug { "embulk-output-vertica: thread finished" }
         
     | 
| 
      
 56 
     | 
    
         
            +
                                break
         
     | 
| 
      
 57 
     | 
    
         
            +
                              end
         
     | 
| 
      
 58 
     | 
    
         
            +
                              Embulk.logger.trace { "embulk-output-vertica: dequeued" }
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                              page.each do |record|
         
     | 
| 
      
 61 
     | 
    
         
            +
                                json = to_json(record)
         
     | 
| 
      
 62 
     | 
    
         
            +
                                Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" }
         
     | 
| 
      
 63 
     | 
    
         
            +
                                stdin << json << "\n"
         
     | 
| 
      
 64 
     | 
    
         
            +
                                @num_input_rows += 1
         
     | 
| 
      
 65 
     | 
    
         
            +
                              end
         
     | 
| 
      
 66 
     | 
    
         
            +
                            end
         
     | 
| 
      
 67 
     | 
    
         
            +
                          end
         
     | 
| 
      
 68 
     | 
    
         
            +
                          num_rejected_rows = rejects.size
         
     | 
| 
      
 69 
     | 
    
         
            +
                          @num_output_rows += num_output_rows
         
     | 
| 
      
 70 
     | 
    
         
            +
                          @num_rejected_rows += num_rejected_rows
         
     | 
| 
      
 71 
     | 
    
         
            +
                          jv.commit
         
     | 
| 
      
 72 
     | 
    
         
            +
                          Embulk.logger.info { "embulk-output-vertica: COMMIT!" }
         
     | 
| 
      
 73 
     | 
    
         
            +
                        rescue java.sql.SQLDataException => e
         
     | 
| 
      
 74 
     | 
    
         
            +
                          jv.rollback
         
     | 
| 
      
 75 
     | 
    
         
            +
                          if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
         
     | 
| 
      
 76 
     | 
    
         
            +
                            Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
         
     | 
| 
      
 77 
     | 
    
         
            +
                          else
         
     | 
| 
      
 78 
     | 
    
         
            +
                            Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
         
     | 
| 
      
 79 
     | 
    
         
            +
                          end
         
     | 
| 
      
 80 
     | 
    
         
            +
                          raise e # die transaction
         
     | 
| 
      
 81 
     | 
    
         
            +
                        end
         
     | 
| 
      
 82 
     | 
    
         
            +
                      end
         
     | 
| 
      
 83 
     | 
    
         
            +
                    end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                    def start
         
     | 
| 
      
 86 
     | 
    
         
            +
                      @thread = Thread.new(&method(:run))
         
     | 
| 
      
 87 
     | 
    
         
            +
                    end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                    def commit
         
     | 
| 
      
 90 
     | 
    
         
            +
                      @queue.push('finish') if @thread.alive?
         
     | 
| 
      
 91 
     | 
    
         
            +
                      Thread.pass
         
     | 
| 
      
 92 
     | 
    
         
            +
                      @thread.join # the same error with run would be raised at here
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                      task_report = {
         
     | 
| 
      
 95 
     | 
    
         
            +
                        'num_input_rows' => @num_input_rows,
         
     | 
| 
      
 96 
     | 
    
         
            +
                        'num_output_rows' => @num_output_rows,
         
     | 
| 
      
 97 
     | 
    
         
            +
                        'num_rejected_rows' => @num_rejected_rows,
         
     | 
| 
      
 98 
     | 
    
         
            +
                      }
         
     | 
| 
      
 99 
     | 
    
         
            +
                    end
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                    # private
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
                    def copy(conn, sql, &block)
         
     | 
| 
      
 104 
     | 
    
         
            +
                      Embulk.logger.debug "embulk-output-vertica: #{sql}"
         
     | 
| 
      
 105 
     | 
    
         
            +
                      results, rejects = conn.copy(sql, &block)
         
     | 
| 
      
 106 
     | 
    
         
            +
                    end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                    def copy_sql
         
     | 
| 
      
 109 
     | 
    
         
            +
                      @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
         
     | 
| 
      
 110 
     | 
    
         
            +
                    end
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                    def to_json(record)
         
     | 
| 
      
 113 
     | 
    
         
            +
                      Hash[*(@schema.names.zip(record).map do |column_name, value|
         
     | 
| 
      
 114 
     | 
    
         
            +
                        [column_name, @converters[column_name].call(value)]
         
     | 
| 
      
 115 
     | 
    
         
            +
                      end.flatten!(1))].to_json
         
     | 
| 
      
 116 
     | 
    
         
            +
                    end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                    def quoted_schema
         
     | 
| 
      
 119 
     | 
    
         
            +
                      ::Jvertica.quote_identifier(@task['schema'])
         
     | 
| 
      
 120 
     | 
    
         
            +
                    end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                    def quoted_table
         
     | 
| 
      
 123 
     | 
    
         
            +
                      ::Jvertica.quote_identifier(@task['table'])
         
     | 
| 
      
 124 
     | 
    
         
            +
                    end
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                    def quoted_temp_table
         
     | 
| 
      
 127 
     | 
    
         
            +
                      ::Jvertica.quote_identifier(@task['temp_table'])
         
     | 
| 
      
 128 
     | 
    
         
            +
                    end
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                    def copy_mode
         
     | 
| 
      
 131 
     | 
    
         
            +
                      " #{@task['copy_mode']}"
         
     | 
| 
      
 132 
     | 
    
         
            +
                    end
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
                    def abort_on_error
         
     | 
| 
      
 135 
     | 
    
         
            +
                      @task['abort_on_error'] ? ' ABORT ON ERROR' : ''
         
     | 
| 
      
 136 
     | 
    
         
            +
                    end
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                    def fjsonparser
         
     | 
| 
      
 139 
     | 
    
         
            +
                      " PARSER fjsonparser(#{reject_on_materialized_type_error})"
         
     | 
| 
      
 140 
     | 
    
         
            +
                    end
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                    def reject_on_materialized_type_error
         
     | 
| 
      
 143 
     | 
    
         
            +
                      @task['reject_on_materialized_type_error'] ? 'reject_on_materialized_type_error=true' : ''
         
     | 
| 
      
 144 
     | 
    
         
            +
                    end
         
     | 
| 
      
 145 
     | 
    
         
            +
                  end
         
     | 
| 
      
 146 
     | 
    
         
            +
                end
         
     | 
| 
      
 147 
     | 
    
         
            +
              end
         
     | 
| 
      
 148 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'jvertica'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'connection_pool'
         
     | 
| 
       3 
2 
     | 
    
         
             
            require_relative 'vertica/value_converter_factory'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'vertica/output_thread'
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            module Embulk
         
     | 
| 
       6 
6 
     | 
    
         
             
              module Output
         
     | 
| 
         @@ -10,8 +10,8 @@ module Embulk 
     | 
|
| 
       10 
10 
     | 
    
         
             
                  class Error < StandardError; end
         
     | 
| 
       11 
11 
     | 
    
         
             
                  class NotSupportedType < Error; end
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
                  def self. 
     | 
| 
       14 
     | 
    
         
            -
                    @ 
     | 
| 
      
 13 
     | 
    
         
            +
                  def self.thread_pool
         
     | 
| 
      
 14 
     | 
    
         
            +
                    @thread_pool ||= @thread_pool_proc.call
         
     | 
| 
       15 
15 
     | 
    
         
             
                  end
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
                  def self.transaction(config, schema, processor_count, &control)
         
     | 
| 
         @@ -27,24 +27,14 @@ module Embulk 
     | 
|
| 
       27 
27 
     | 
    
         
             
                      'mode'             => config.param('mode',             :string,  :default => 'insert'),
         
     | 
| 
       28 
28 
     | 
    
         
             
                      'copy_mode'        => config.param('copy_mode',        :string,  :default => 'AUTO'),
         
     | 
| 
       29 
29 
     | 
    
         
             
                      'abort_on_error'   => config.param('abort_on_error',   :bool,    :default => false),
         
     | 
| 
       30 
     | 
    
         
            -
                      'default_timezone' => config.param('default_timezone', :string, 
     | 
| 
      
 30 
     | 
    
         
            +
                      'default_timezone' => config.param('default_timezone', :string, :default => 'UTC'),
         
     | 
| 
       31 
31 
     | 
    
         
             
                      'column_options'   => config.param('column_options',   :hash,    :default => {}),
         
     | 
| 
       32 
32 
     | 
    
         
             
                      'reject_on_materialized_type_error' => config.param('reject_on_materialized_type_error', :bool, :default => false),
         
     | 
| 
       33 
33 
     | 
    
         
             
                      'pool'             => config.param('pool',             :integer, :default => processor_count),
         
     | 
| 
       34 
     | 
    
         
            -
                      'pool_timeout'     => config.param('pool_timeout',     :integer, :default => 600),
         
     | 
| 
       35 
34 
     | 
    
         
             
                    }
         
     | 
| 
       36 
     | 
    
         
            -
                    task['user'] ||= task['username']
         
     | 
| 
       37 
35 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
                    @ 
     | 
| 
       39 
     | 
    
         
            -
                       
     | 
| 
       40 
     | 
    
         
            -
                        ::Jvertica.connect({
         
     | 
| 
       41 
     | 
    
         
            -
                          host: task['host'],
         
     | 
| 
       42 
     | 
    
         
            -
                          port: task['port'],
         
     | 
| 
       43 
     | 
    
         
            -
                          user: task['user'],
         
     | 
| 
       44 
     | 
    
         
            -
                          password: task['password'],
         
     | 
| 
       45 
     | 
    
         
            -
                          database: task['database'],
         
     | 
| 
       46 
     | 
    
         
            -
                        })
         
     | 
| 
       47 
     | 
    
         
            -
                      end
         
     | 
| 
      
 36 
     | 
    
         
            +
                    @thread_pool_proc = Proc.new do
         
     | 
| 
      
 37 
     | 
    
         
            +
                      OutputThreadPool.new(task, schema, task['pool'])
         
     | 
| 
       48 
38 
     | 
    
         
             
                    end
         
     | 
| 
       49 
39 
     | 
    
         | 
| 
       50 
40 
     | 
    
         
             
                    task['user'] ||= task['username']
         
     | 
| 
         @@ -73,7 +63,7 @@ module Embulk 
     | 
|
| 
       73 
63 
     | 
    
         
             
                    sql_schema_table = self.sql_schema_from_embulk_schema(schema, task['column_options'])
         
     | 
| 
       74 
64 
     | 
    
         | 
| 
       75 
65 
     | 
    
         
             
                    # create the target table
         
     | 
| 
       76 
     | 
    
         
            -
                     
     | 
| 
      
 66 
     | 
    
         
            +
                    connect(task) do |jv|
         
     | 
| 
       77 
67 
     | 
    
         
             
                      query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_table}]) if task['mode'] == 'REPLACE'
         
     | 
| 
       78 
68 
     | 
    
         
             
                      query(jv, %[CREATE TABLE IF NOT EXISTS #{quoted_schema}.#{quoted_table} (#{sql_schema_table})])
         
     | 
| 
       79 
69 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -81,7 +71,7 @@ module Embulk 
     | 
|
| 
       81 
71 
     | 
    
         
             
                    sql_schema_temp_table = self.sql_schema_from_table(task)
         
     | 
| 
       82 
72 
     | 
    
         | 
| 
       83 
73 
     | 
    
         
             
                    # create a temp table
         
     | 
| 
       84 
     | 
    
         
            -
                     
     | 
| 
      
 74 
     | 
    
         
            +
                    connect(task) do |jv|
         
     | 
| 
       85 
75 
     | 
    
         
             
                      query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
         
     | 
| 
       86 
76 
     | 
    
         
             
                      query(jv, %[CREATE TABLE #{quoted_schema}.#{quoted_temp_table} (#{sql_schema_temp_table})])
         
     | 
| 
       87 
77 
     | 
    
         
             
                      # Create internal vertica projection beforehand, otherwirse, parallel copies lock table to create a projection and we get S Lock error sometimes
         
     | 
| 
         @@ -96,30 +86,21 @@ module Embulk 
     | 
|
| 
       96 
86 
     | 
    
         | 
| 
       97 
87 
     | 
    
         
             
                    begin
         
     | 
| 
       98 
88 
     | 
    
         
             
                      # insert data into the temp table
         
     | 
| 
       99 
     | 
    
         
            -
                       
     | 
| 
       100 
     | 
    
         
            -
                       
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
                        Embulk.logger.info { "embulk-output-vertica: COMMIT!" }
         
     | 
| 
       103 
     | 
    
         
            -
                        jv.close rescue nil
         
     | 
| 
       104 
     | 
    
         
            -
                      end
         
     | 
| 
       105 
     | 
    
         
            -
                      @connection_pool = nil
         
     | 
| 
      
 89 
     | 
    
         
            +
                      thread_pool.start
         
     | 
| 
      
 90 
     | 
    
         
            +
                      yield(task)
         
     | 
| 
      
 91 
     | 
    
         
            +
                      task_reports = thread_pool.commit
         
     | 
| 
       106 
92 
     | 
    
         
             
                      Embulk.logger.info { "embulk-output-vertica: task_reports: #{task_reports.to_json}" }
         
     | 
| 
       107 
93 
     | 
    
         | 
| 
       108 
94 
     | 
    
         
             
                      # insert select from the temp table
         
     | 
| 
       109 
     | 
    
         
            -
                       
     | 
| 
      
 95 
     | 
    
         
            +
                      connect(task) do |jv|
         
     | 
| 
       110 
96 
     | 
    
         
             
                        query(jv, %[INSERT INTO #{quoted_schema}.#{quoted_table} SELECT * FROM #{quoted_schema}.#{quoted_temp_table}])
         
     | 
| 
       111 
97 
     | 
    
         
             
                        jv.commit
         
     | 
| 
       112 
98 
     | 
    
         
             
                      end
         
     | 
| 
       113 
99 
     | 
    
         
             
                    ensure
         
     | 
| 
       114 
     | 
    
         
            -
                       
     | 
| 
      
 100 
     | 
    
         
            +
                      connect(task) do |jv|
         
     | 
| 
       115 
101 
     | 
    
         
             
                        # clean up the temp table
         
     | 
| 
       116 
     | 
    
         
            -
                        Embulk.logger.debug { "embulk-output-vertica: select count #{query(jv, %[SELECT count(*) FROM #{quoted_schema}.#{quoted_temp_table}]).map {|row| row.to_h }.join("\n") rescue nil}" }
         
     | 
| 
       117 
     | 
    
         
            -
                        Embulk.logger.trace { "embulk-output-vertica: select limit 10\n#{query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_temp_table} LIMIT 10]).map {|row| row.to_h }.join("\n") rescue nil}" }
         
     | 
| 
       118 
102 
     | 
    
         
             
                        query(jv, %[DROP TABLE IF EXISTS #{quoted_schema}.#{quoted_temp_table}])
         
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
                      connection_pool.shutdown do |jv|
         
     | 
| 
       122 
     | 
    
         
            -
                        jv.close rescue nil
         
     | 
| 
      
 103 
     | 
    
         
            +
                        Embulk.logger.debug { "embulk-output-vertica: select result\n#{query(jv, %[SELECT * FROM #{quoted_schema}.#{quoted_table} LIMIT 10]).map {|row| row.to_h }.join("\n") rescue nil}" }
         
     | 
| 
       123 
104 
     | 
    
         
             
                      end
         
     | 
| 
       124 
105 
     | 
    
         
             
                    end
         
     | 
| 
       125 
106 
     | 
    
         
             
                    # this is for -o next_config option, add some paramters for next time execution if wants
         
     | 
| 
         @@ -130,46 +111,15 @@ module Embulk 
     | 
|
| 
       130 
111 
     | 
    
         
             
                  # instance is created on each thread
         
     | 
| 
       131 
112 
     | 
    
         
             
                  def initialize(task, schema, index)
         
     | 
| 
       132 
113 
     | 
    
         
             
                    super
         
     | 
| 
       133 
     | 
    
         
            -
                    @converters = ValueConverterFactory.create_converters(schema, task['default_timezone'], task['column_options'])
         
     | 
| 
       134 
     | 
    
         
            -
                    Embulk.logger.trace { @converters.to_s }
         
     | 
| 
       135 
     | 
    
         
            -
                    @num_input_rows = 0
         
     | 
| 
       136 
     | 
    
         
            -
                    @num_output_rows = 0
         
     | 
| 
       137 
     | 
    
         
            -
                    @num_rejected_rows = 0
         
     | 
| 
       138 
     | 
    
         
            -
                  end
         
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
                  def connection_pool
         
     | 
| 
       141 
     | 
    
         
            -
                    self.class.connection_pool
         
     | 
| 
       142 
114 
     | 
    
         
             
                  end
         
     | 
| 
       143 
115 
     | 
    
         | 
| 
      
 116 
     | 
    
         
            +
                  # called for each page in each thread
         
     | 
| 
       144 
117 
     | 
    
         
             
                  def close
         
     | 
| 
       145 
     | 
    
         
            -
                    # do not close connection_pool on each thread / page
         
     | 
| 
       146 
118 
     | 
    
         
             
                  end
         
     | 
| 
       147 
119 
     | 
    
         | 
| 
      
 120 
     | 
    
         
            +
                  # called for each page in each thread
         
     | 
| 
       148 
121 
     | 
    
         
             
                  def add(page)
         
     | 
| 
       149 
     | 
    
         
            -
                     
     | 
| 
       150 
     | 
    
         
            -
                      json = nil # for log
         
     | 
| 
       151 
     | 
    
         
            -
                      begin
         
     | 
| 
       152 
     | 
    
         
            -
                        num_output_rows, rejects = copy(jv, copy_sql) do |stdin|
         
     | 
| 
       153 
     | 
    
         
            -
                          page.each do |record|
         
     | 
| 
       154 
     | 
    
         
            -
                            json = to_json(record)
         
     | 
| 
       155 
     | 
    
         
            -
                            Embulk.logger.debug { "embulk-output-vertica: to_json #{json}" }
         
     | 
| 
       156 
     | 
    
         
            -
                            stdin << json << "\n"
         
     | 
| 
       157 
     | 
    
         
            -
                            @num_input_rows += 1
         
     | 
| 
       158 
     | 
    
         
            -
                          end
         
     | 
| 
       159 
     | 
    
         
            -
                        end
         
     | 
| 
       160 
     | 
    
         
            -
                        num_rejected_rows = rejects.size
         
     | 
| 
       161 
     | 
    
         
            -
                        @num_output_rows += num_output_rows
         
     | 
| 
       162 
     | 
    
         
            -
                        @num_rejected_rows += num_rejected_rows
         
     | 
| 
       163 
     | 
    
         
            -
                      rescue java.sql.SQLDataException => e
         
     | 
| 
       164 
     | 
    
         
            -
                        jv.rollback
         
     | 
| 
       165 
     | 
    
         
            -
                        if @task['reject_on_materialized_type_error'] and e.message =~ /Rejected by user-defined parser/
         
     | 
| 
       166 
     | 
    
         
            -
                          Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}"
         
     | 
| 
       167 
     | 
    
         
            -
                        else
         
     | 
| 
       168 
     | 
    
         
            -
                          Embulk.logger.warn "embulk-output-vertica: ROLLBACK!"
         
     | 
| 
       169 
     | 
    
         
            -
                        end
         
     | 
| 
       170 
     | 
    
         
            -
                        raise e # die transaction
         
     | 
| 
       171 
     | 
    
         
            -
                      end
         
     | 
| 
       172 
     | 
    
         
            -
                    end
         
     | 
| 
      
 122 
     | 
    
         
            +
                    self.class.thread_pool.enqueue(page)
         
     | 
| 
       173 
123 
     | 
    
         
             
                  end
         
     | 
| 
       174 
124 
     | 
    
         | 
| 
       175 
125 
     | 
    
         
             
                  def finish
         
     | 
| 
         @@ -178,19 +128,33 @@ module Embulk 
     | 
|
| 
       178 
128 
     | 
    
         
             
                  def abort
         
     | 
| 
       179 
129 
     | 
    
         
             
                  end
         
     | 
| 
       180 
130 
     | 
    
         | 
| 
       181 
     | 
    
         
            -
                  #  
     | 
| 
       182 
     | 
    
         
            -
                  # we do commit on #transaction for all  
     | 
| 
      
 131 
     | 
    
         
            +
                  # called after processing all pages in each thread
         
     | 
| 
      
 132 
     | 
    
         
            +
                  # we do commit on #transaction for all pools, not at here
         
     | 
| 
       183 
133 
     | 
    
         
             
                  def commit
         
     | 
| 
       184 
     | 
    
         
            -
                     
     | 
| 
       185 
     | 
    
         
            -
                    task_report = {
         
     | 
| 
       186 
     | 
    
         
            -
                      "num_input_rows" => @num_input_rows,
         
     | 
| 
       187 
     | 
    
         
            -
                      "num_output_rows" => @num_output_rows,
         
     | 
| 
       188 
     | 
    
         
            -
                      "num_rejected_rows" => @num_rejected_rows,
         
     | 
| 
       189 
     | 
    
         
            -
                    }
         
     | 
| 
      
 134 
     | 
    
         
            +
                    {}
         
     | 
| 
       190 
135 
     | 
    
         
             
                  end
         
     | 
| 
       191 
136 
     | 
    
         | 
| 
       192 
137 
     | 
    
         
             
                  private
         
     | 
| 
       193 
138 
     | 
    
         | 
| 
      
 139 
     | 
    
         
            +
                  def self.connect(task)
         
     | 
| 
      
 140 
     | 
    
         
            +
                    jv = ::Jvertica.connect({
         
     | 
| 
      
 141 
     | 
    
         
            +
                      host: task['host'],
         
     | 
| 
      
 142 
     | 
    
         
            +
                      port: task['port'],
         
     | 
| 
      
 143 
     | 
    
         
            +
                      user: task['user'],
         
     | 
| 
      
 144 
     | 
    
         
            +
                      password: task['password'],
         
     | 
| 
      
 145 
     | 
    
         
            +
                      database: task['database'],
         
     | 
| 
      
 146 
     | 
    
         
            +
                    })
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                    if block_given?
         
     | 
| 
      
 149 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 150 
     | 
    
         
            +
                        yield jv
         
     | 
| 
      
 151 
     | 
    
         
            +
                      ensure
         
     | 
| 
      
 152 
     | 
    
         
            +
                        jv.close
         
     | 
| 
      
 153 
     | 
    
         
            +
                      end
         
     | 
| 
      
 154 
     | 
    
         
            +
                    end
         
     | 
| 
      
 155 
     | 
    
         
            +
                    jv
         
     | 
| 
      
 156 
     | 
    
         
            +
                  end
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
       194 
158 
     | 
    
         
             
                  # @param [Schema] schema embulk defined column types
         
     | 
| 
       195 
159 
     | 
    
         
             
                  # @param [Hash]   column_options user defined column types
         
     | 
| 
       196 
160 
     | 
    
         
             
                  # @return [String] sql schema used to CREATE TABLE
         
     | 
| 
         @@ -224,7 +188,7 @@ module Embulk 
     | 
|
| 
       224 
188 
     | 
    
         
             
                      "WHERE table_schema = #{quoted_schema} AND table_name = #{quoted_table}"
         
     | 
| 
       225 
189 
     | 
    
         | 
| 
       226 
190 
     | 
    
         
             
                    sql_schema = {}
         
     | 
| 
       227 
     | 
    
         
            -
                     
     | 
| 
      
 191 
     | 
    
         
            +
                    connect(task) do |jv|
         
     | 
| 
       228 
192 
     | 
    
         
             
                      result = query(jv, sql)
         
     | 
| 
       229 
193 
     | 
    
         
             
                      sql_schema = result.map {|row| [row[0], row[1]] }
         
     | 
| 
       230 
194 
     | 
    
         
             
                    end
         
     | 
| 
         @@ -239,49 +203,6 @@ module Embulk 
     | 
|
| 
       239 
203 
     | 
    
         
             
                  def query(conn, sql)
         
     | 
| 
       240 
204 
     | 
    
         
             
                    self.class.query(conn, sql)
         
     | 
| 
       241 
205 
     | 
    
         
             
                  end
         
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
                  def copy(conn, sql, &block)
         
     | 
| 
       244 
     | 
    
         
            -
                    Embulk.logger.debug "embulk-output-vertica: #{sql}"
         
     | 
| 
       245 
     | 
    
         
            -
                    results, rejects = conn.copy(sql, &block)
         
     | 
| 
       246 
     | 
    
         
            -
                  end
         
     | 
| 
       247 
     | 
    
         
            -
             
     | 
| 
       248 
     | 
    
         
            -
                  def copy_sql
         
     | 
| 
       249 
     | 
    
         
            -
                    @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT"
         
     | 
| 
       250 
     | 
    
         
            -
                  end
         
     | 
| 
       251 
     | 
    
         
            -
             
     | 
| 
       252 
     | 
    
         
            -
                  def to_json(record)
         
     | 
| 
       253 
     | 
    
         
            -
                    Hash[*(schema.names.zip(record).map do |column_name, value|
         
     | 
| 
       254 
     | 
    
         
            -
                      [column_name, @converters[column_name].call(value)]
         
     | 
| 
       255 
     | 
    
         
            -
                    end.flatten!(1))].to_json
         
     | 
| 
       256 
     | 
    
         
            -
                  end
         
     | 
| 
       257 
     | 
    
         
            -
             
     | 
| 
       258 
     | 
    
         
            -
                  def quoted_schema
         
     | 
| 
       259 
     | 
    
         
            -
                    ::Jvertica.quote_identifier(@task['schema'])
         
     | 
| 
       260 
     | 
    
         
            -
                  end
         
     | 
| 
       261 
     | 
    
         
            -
             
     | 
| 
       262 
     | 
    
         
            -
                  def quoted_table
         
     | 
| 
       263 
     | 
    
         
            -
                    ::Jvertica.quote_identifier(@task['table'])
         
     | 
| 
       264 
     | 
    
         
            -
                  end
         
     | 
| 
       265 
     | 
    
         
            -
             
     | 
| 
       266 
     | 
    
         
            -
                  def quoted_temp_table
         
     | 
| 
       267 
     | 
    
         
            -
                    ::Jvertica.quote_identifier(@task['temp_table'])
         
     | 
| 
       268 
     | 
    
         
            -
                  end
         
     | 
| 
       269 
     | 
    
         
            -
             
     | 
| 
       270 
     | 
    
         
            -
                  def copy_mode
         
     | 
| 
       271 
     | 
    
         
            -
                    " #{@task['copy_mode']}"
         
     | 
| 
       272 
     | 
    
         
            -
                  end
         
     | 
| 
       273 
     | 
    
         
            -
             
     | 
| 
       274 
     | 
    
         
            -
                  def abort_on_error
         
     | 
| 
       275 
     | 
    
         
            -
                    @task['abort_on_error'] ? ' ABORT ON ERROR' : ''
         
     | 
| 
       276 
     | 
    
         
            -
                  end
         
     | 
| 
       277 
     | 
    
         
            -
             
     | 
| 
       278 
     | 
    
         
            -
                  def fjsonparser
         
     | 
| 
       279 
     | 
    
         
            -
                    " PARSER fjsonparser(#{reject_on_materialized_type_error})"
         
     | 
| 
       280 
     | 
    
         
            -
                  end
         
     | 
| 
       281 
     | 
    
         
            -
             
     | 
| 
       282 
     | 
    
         
            -
                  def reject_on_materialized_type_error
         
     | 
| 
       283 
     | 
    
         
            -
                    @task['reject_on_materialized_type_error'] ? 'reject_on_materialized_type_error=true' : ''
         
     | 
| 
       284 
     | 
    
         
            -
                  end
         
     | 
| 
       285 
206 
     | 
    
         
             
                end
         
     | 
| 
       286 
207 
     | 
    
         
             
              end
         
     | 
| 
       287 
208 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: embulk-output-vertica
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.5.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - eiji.sekiya
         
     | 
| 
         @@ -39,20 +39,6 @@ dependencies: 
     | 
|
| 
       39 
39 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       40 
40 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       41 
41 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       42 
     | 
    
         
            -
            - !ruby/object:Gem::Dependency
         
     | 
| 
       43 
     | 
    
         
            -
              name: connection_pool
         
     | 
| 
       44 
     | 
    
         
            -
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       45 
     | 
    
         
            -
                requirements:
         
     | 
| 
       46 
     | 
    
         
            -
                - - ">="
         
     | 
| 
       47 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       48 
     | 
    
         
            -
                    version: '0'
         
     | 
| 
       49 
     | 
    
         
            -
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       50 
     | 
    
         
            -
                requirements:
         
     | 
| 
       51 
     | 
    
         
            -
                - - ">="
         
     | 
| 
       52 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       53 
     | 
    
         
            -
                    version: '0'
         
     | 
| 
       54 
     | 
    
         
            -
              prerelease: false
         
     | 
| 
       55 
     | 
    
         
            -
              type: :runtime
         
     | 
| 
       56 
42 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       57 
43 
     | 
    
         
             
              name: bundler
         
     | 
| 
       58 
44 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
         @@ -99,6 +85,7 @@ files: 
     | 
|
| 
       99 
85 
     | 
    
         
             
            - example.csv
         
     | 
| 
       100 
86 
     | 
    
         
             
            - example.yml
         
     | 
| 
       101 
87 
     | 
    
         
             
            - lib/embulk/output/vertica.rb
         
     | 
| 
      
 88 
     | 
    
         
            +
            - lib/embulk/output/vertica/output_thread.rb
         
     | 
| 
       102 
89 
     | 
    
         
             
            - lib/embulk/output/vertica/value_converter_factory.rb
         
     | 
| 
       103 
90 
     | 
    
         
             
            homepage: https://github.com/eratostennis/embulk-output-vertica
         
     | 
| 
       104 
91 
     | 
    
         
             
            licenses:
         
     |