RubyGems - fluent-plugin-pgdist - Versions diffs - 0.1.1 → 0.2.0 - Mend

fluent-plugin-pgdist 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/Gemfile +2 -2
data/README.md +125 -4
data/VERSION +1 -1
data/fluent-plugin-pgdist.gemspec +8 -8
data/lib/fluent/plugin/out_pgdist.rb +203 -40
data/spec/lib/fluent/plugin/out_pgdist_spec.rb +311 -62
metadata +11 -11

data/Gemfile CHANGED

@@ -6,8 +6,8 @@ source "http://rubygems.org"
 # Add dependencies to develop your gem here.
 # Include everything needed to run rake, tests, features, etc.
-gem "fluentd"
-gem "pg"
+gem "fluentd", "~> 0.10.33"
+gem "pg", "~> 0.15.1"
 group :development do
   gem "rspec", "~> 2.12.0"

data/README.md CHANGED

@@ -7,7 +7,7 @@ fluent-plugin-pgdist is a fluentd plugin for distribute insert into PostgreSQL.
     # gem install fluentd
     # gem install fluent-plugin-pgdist
-## Usage
+## Usage: Insert into PostgreSQL table
 Define match directive for pgdist in fluentd config file(ex. fluent.conf):
@@ -41,16 +41,125 @@ Input data:
 Check table data:
     $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
-     id  |     created_at      |                               value
+     id  |     created_at      |                               value
     -----+---------------------+--------------------------------------------------------------------
      100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
     (1 行)
     $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
-     id  |     created_at      |                               value
+     id  |     created_at      |                               value
     -----+---------------------+--------------------------------------------------------------------
-     100 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+     101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
     (1 行)
+## Usage: Insert into PostgreSQL with unique constraint
+Define match directive for pgdist in fluentd config file(ex. fluent.conf):
+    <match pgdist.input>
+      type pgdist
+      host localhost
+      username postgres
+      password postgres
+      database pgdist
+      table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
+      insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
+      columns id,created_at,value
+      values $1,$2,$3
+      raise_exception false
+      unique_column id
+    </match>
+Run fluentd:
+    $ fluentd -c fluent.conf &
+Create output table:
+    $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+    $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+Input data:
+    $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
+Check table data:
+    $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
+    (1 行)
+    $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+       2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
+    (1 行)
+## Usage: Insert into PostgreSQL and LTSV file
+Define match directive for pgdist in fluentd config file(ex. fluent.conf):
+    <match pgdist.input>
+      type pgdist
+      host localhost
+      username postgres
+      password postgres
+      database pgdist
+      table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
+      insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
+      columns id,created_at,value
+      values $1,$2,$3
+      raise_exception false
+      unique_column id
+      file_moniker {|table|"/tmp/"+table}
+      file_format ltsv
+      file_record_filter {|f,r|h=JSON.parse(r["value"]);[["seq","id","created_at","text"],[r["seq"],r["id"],r["created_at"],h["text"]]]}
+      sequence_moniker {|table|"/tmp/"+table+".seq"}
+      sequence_column seq
+    </match>
+Run fluentd:
+    $ fluentd -c fluent.conf &
+Create output table:
+    $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+    $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+Input data:
+    $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
+Check table data:
+    $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
+    (1 行)
+    $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+       2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
+    (1 行)
+Check file data:
+    $ cat /tmp/pgdist_test20130430
+    seq:1   id:100  created_at:2013-04-30 01:23:45  text:message1
+    $ cat /tmp/pgdist_test20130501
+    seq:1   id:101  created_at:2013-05-01 01:23:45  text:message2
+    seq:2   id:102  created_at:2013-05-01 01:23:46  text:message3
 ## Parameter
 * host
@@ -73,6 +182,18 @@ Check table data:
  * Column values in insert SQL
 * raise_exception
  * Flag to enable/disable exception in insert
+* unique_column
+ * Column name with unique constraint
+* file_moniker
+ * Ruby script that returns the output file name of each table
+* file_format
+ * Output file format. json/ltsv/msgpack/tsv format is available.
+* file_record_filter
+ * Ruby script to convert record for json/ltsv/msgpack file. This filter receives hash in json/msgpack format, [fields, values] in ltsv format.
+* sequnece_column
+ * Sequence column name in PostgreSQL table
+* sequence_moniker
+ * Ruby script that returns the sequence file name of each table
 ## Contributing to fluent-plugin-pgdist

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.2.0

data/fluent-plugin-pgdist.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "fluent-plugin-pgdist"
-  s.version = "0.1.1"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Kenji Hara"]
-  s.date = "2013-05-01"
+  s.date = "2013-05-02"
   s.description = "Fluentd plugin for distribute insert into PostgreSQL"
   s.email = "haracane@gmail.com"
   s.extra_rdoc_files = [
@@ -39,8 +39,8 @@ Gem::Specification.new do |s|
     s.specification_version = 3
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<fluentd>, [">= 0"])
-      s.add_runtime_dependency(%q<pg>, [">= 0"])
+      s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.33"])
+      s.add_runtime_dependency(%q<pg>, ["~> 0.15.1"])
       s.add_development_dependency(%q<rspec>, ["~> 2.12.0"])
       s.add_development_dependency(%q<yard>, ["~> 0.8.3"])
       s.add_development_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -52,8 +52,8 @@ Gem::Specification.new do |s|
       s.add_development_dependency(%q<ci_reporter>, ["~> 1.8.3"])
       s.add_development_dependency(%q<flog>, ["~> 3.2.1"])
     else
-      s.add_dependency(%q<fluentd>, [">= 0"])
-      s.add_dependency(%q<pg>, [">= 0"])
+      s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
+      s.add_dependency(%q<pg>, ["~> 0.15.1"])
       s.add_dependency(%q<rspec>, ["~> 2.12.0"])
       s.add_dependency(%q<yard>, ["~> 0.8.3"])
       s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -66,8 +66,8 @@ Gem::Specification.new do |s|
       s.add_dependency(%q<flog>, ["~> 3.2.1"])
     end
   else
-    s.add_dependency(%q<fluentd>, [">= 0"])
-    s.add_dependency(%q<pg>, [">= 0"])
+    s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
+    s.add_dependency(%q<pg>, ["~> 0.15.1"])
     s.add_dependency(%q<rspec>, ["~> 2.12.0"])
     s.add_dependency(%q<yard>, ["~> 0.8.3"])
     s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])

data/lib/fluent/plugin/out_pgdist.rb CHANGED

@@ -13,60 +13,193 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
   config_param :table_moniker, :string, :default => nil
   config_param :insert_filter, :string, :default => nil
   config_param :raise_exception, :bool, :default => false
+  config_param :insert_columns, :string, :default => nil
   config_param :columns, :string, :default => nil
+  config_param :insert_values, :string, :default => nil
   config_param :values, :string, :default => nil
+  config_param :unique_column, :string, :default => nil
-  config_param :format, :string, :default => "raw" # or json
+  config_param :sequence_moniker, :string, :default => nil
+  config_param :file_moniker, :string, :default => nil
+  config_param :file_record_filter, :string, :default => nil
+  config_param :file_format, :string, :default=> nil
+  config_param :file_write_limit, :integer, :default=>10000
+  config_param :sequence_column, :string, :default => "seq"
   attr_accessor :handler
-  def initialize
-    super
-    require 'pg'
+  def client
+    PG::Connection.new({
+      :host => @host, :port => @port,
+      :user => @username, :password => @password,
+      :dbname => @database
+    })
   end
-  # We don't currently support mysql's analogous json format
   def configure(conf)
     super
-    if @columns.nil?
-      raise Fluent::ConfigError, "columns MUST be specified, but missing"
+    @insert_columns ||= @columns
+    @insert_values ||= @values
+    if @insert_columns.nil?
+      raise fluent::configerror, "columns must be specified, but missing"
     end
     @table_moniker_lambda = eval("lambda#{@table_moniker}")
     @insert_filter_lambda = eval("lambda#{@insert_filter}")
+    if @file_moniker
+      @file_moniker_lambda = eval("lambda#{@file_moniker}")
+      @sequence_moniker ||= '{|table|"/tmp/#{table}.seq"}'
+      case @file_format
+      when "json" || "msgpack" || "message_pack"
+        @file_record_filter ||= '{|record|record}'
+      when "ltsv"
+        @file_record_filter ||= '{|fields,record|[fields,record]}'
+      else
+      end
+    end
+    @file_record_filter_lambda = eval("lambda#{@file_record_filter}") if @file_record_filter
+    @sequence_moniker_lambda = eval("lambda#{@sequence_moniker}") if @sequence_moniker
     self
   end
-  def start
-    super
+  DB_ESCAPE_PATTERN = Regexp.new("[\\\\\\a\\b\\n\\r\\t]")
+  def db_escape(str)
+    return "\\N" if str.nil?
+    rest = str
+    ret = ''
+    while match_data = DB_ESCAPE_PATTERN.match(rest)
+      ret += match_data.pre_match
+      code = match_data[0]
+      rest = match_data.post_match
+      case code
+      when '\\'
+        ret += '\\\\'
+      when "\a"
+        ret += "\\a"
+      when "\b"
+        ret += "\\b"
+      when "\n"
+        ret += "\\n"
+      when "\r"
+        ret += "\\r"
+      when "\t"
+        ret += "\\t"
+      end
+    end
+    return ret + rest
   end
-  def shutdown
-    super
+  def delete_duplicative_records(records)
+    records.uniq!{|r|r[@unique_column]}
   end
-  def format(tag, time, record)
-    [tag, time, record].to_msgpack
+  def delete_existing_records(handler, table, records)
+    unique_values = records.map{|r|r[@unique_column]}
+    if unique_values != []
+      where_sql = "where " + 1.upto(unique_values.size).map{|i|"#{@unique_column} = \$#{i}"}.join(" or ")
+      handler.prepare("select_#{table}", "select #{@unique_column} from #{table} #{where_sql}")
+      result = handler.exec_prepared("select_#{table}", unique_values)
+      exist_values = result.column_values(0)
+      return if exist_values.size == 0
+      $log.info "delete #{exist_values.size} duplicative records for #{table}"
+      records.reject!{|r|exist_values.include?(r[@unique_column])}
+    end
   end
-  def client
-    pgconn = PG::Connection.new({
-      :host => @host, :port => @port,
-      :user => @username, :password => @password,
-      :dbname => @database
-    })
-    return pgconn
+  def file_path(table)
+    @file_moniker_lambda.call(table)
   end
-  def table_name(record)
-    @table_moniker_lambda.call(record)
+  def filter_for_file_record(*args)
+    result = @file_record_filter_lambda.call(*args)
+    return result
   end
   def filter_for_insert(record)
     @insert_filter_lambda.call(record)
   end
+  def sequence_path(table)
+    @sequence_moniker_lambda.call(table)
+  end
+  def format(tag, time, record)
+    [tag, time, record].to_msgpack
+  end
+  def initialize
+    super
+    require 'pg'
+  end
+  def insert_into_db(handler, table, records)
+    if @unique_column
+      delete_duplicative_records(records)
+      delete_existing_records(handler, table, records)
+    end
+    $log.info "insert #{records.size} records into #{table}"
+    sql = "INSERT INTO #{table}(#{@insert_columns}) VALUES(#{@insert_values})"
+    $log.info "execute sql #{sql.inspect}"
+    statement = "write_#{table}"
+    handler.prepare(statement, sql)
+    records.each do |record|
+      record = filter_for_insert(record)
+      $log.info "insert #{record.inspect}"
+      begin
+        handler.exec_prepared(statement, record)
+      rescue Exception=>e
+        if @raise_exception
+          raise e
+        else
+          $log.info e.message
+        end
+      end
+    end
+  end
+  def read_last_sequence(filepath)
+    last_sequence =  File.read(filepath).chomp
+    last_sequence = nil if /-?[0-9]+/ !~ last_sequence
+    return last_sequence
+  end
+  def read_last_sequence_from_file(handler, table, filepath)
+    last_line = `tail -n 1 #{filepath}`
+    case @file_format
+    when "json"
+      last_record = JSON.parse(last_line)
+      last_sequence = last_record[@sequence_column]
+    when "ltsv"
+      last_record = Hash[last_line.split(/\t/).map{|p|p.split(/:/, 2)}]
+      last_sequence = last_record[@sequence_column]
+    else
+      result = handler.exec("select * from #{table} limit 0")
+      fields = result.fields
+      sequence_index = fields.index(@sequence_column)
+      last_record = last_line.split(/\t/)
+      last_sequence = last_record[sequence_index]
+    end
+    last_sequence = nil if /-?[0-9]+/ !~ last_sequence
+    return last_sequence
+  end
+  def shutdown
+    super
+  end
+  def start
+    super
+  end
+  def table_name(record)
+    @table_moniker_lambda.call(record)
+  end
   def write(chunk)
     handler = self.client
     records_hash = {}
@@ -78,25 +211,55 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
       end
     }
     records_hash.each_pair do |table, records|
-      $log.info "insert #{records.size} records into #{table}"
-      sql = "INSERT INTO #{table}(#{@columns}) VALUES(#{@values})"
-      $log.info "execute sql #{sql.inspect}"
-      statement = "write_#{table}"
-      handler.prepare(statement, sql)
-      records.each do |record|
-        record = filter_for_insert(record)
-        $log.info "insert #{record.inspect}"
-        begin
-          handler.exec_prepared(statement, record)
-        rescue Exception=>e
-          if @raise_exception
-            raise e
-          else
-            $log.info e.message
-          end
-        end
-      end
+      insert_into_db(handler, table, records)
+      write_to_file(handler, table) if @file_moniker
     end
     handler.close
   end
+  def write_pg_result(output_stream, fields, pg_result)
+    case @file_format
+    when "json"
+      pg_result.each do |tuple|
+        tuple = filter_for_file_record(tuple)
+        output_stream.puts(tuple.to_json)
+      end
+    when "ltsv"
+      pg_result.each_row do |row|
+        fields, row = filter_for_file_record(fields, row)
+        output_stream.puts(fields.each_with_index.map{|f,i|"#{f}:#{db_escape(row[i])}"}.join("\t"))
+      end
+    when "msgpack" || "message_pack"
+      pg_result.each do |tuple|
+        tuple = filter_for_file_record(tuple)
+        output_stream.write(tuple.to_msgpack)
+      end
+    else
+      pg_result.each_row do |row|
+        output_stream.puts(row.map{|v|db_escape(v)}.join("\t"))
+      end
+    end
+  end
+  def write_to_file(handler, table)
+    sequence_file_path = sequence_path(table)
+    last_sequence = read_last_sequence(sequence_file_path) if File.exists?(sequence_file_path)
+    file = nil
+    while true
+      where_sql = "where #{last_sequence} < #{@sequence_column}" if last_sequence
+      result = handler.exec("select * from #{table} #{where_sql} order by #{@sequence_column} limit #{@file_write_limit}")
+      result_size = result.ntuples
+      break if result_size == 0
+      fields ||= result.fields
+      file ||= File.open(file_path(table), "a")
+      write_pg_result(file, fields, result)
+      last_sequence = result[result_size-1][@sequence_column]
+      break if result_size < @file_write_limit
+    end
+    if file
+      file.close
+      File.write(sequence_file_path, last_sequence.to_s) if last_sequence
+    end
+  end
 end

data/spec/lib/fluent/plugin/out_pgdist_spec.rb CHANGED

@@ -1,14 +1,42 @@
 require "spec_helper"
 describe Fluent::PgdistOutput do
+  def init_test_tables
+    sql = %[
+      CREATE SCHEMA pgdist;
+      DROP TABLE pgdist.pgdist_test20130430;
+      DROP TABLE pgdist.pgdist_test20130501;
+      CREATE TABLE pgdist.pgdist_test20130430(
+        seq serial NOT NULL,
+        id text unique,
+        created_at timestamp without time zone,
+        value text);
+      CREATE TABLE pgdist.pgdist_test20130501(
+        seq serial NOT NULL,
+        id text unique,
+        created_at timestamp without time zone,
+        value text);
+      INSERT INTO pgdist.pgdist_test20130501(id, created_at, value)
+      VALUES ('101', '2013-05-01T01:23:45Z', 'dummy');
+    ]
+    `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
+    exist_record = [1, '101','2013-05-01T01:23:45Z','dummy']
+    `rm /tmp/pgdist.pgdist_test20130430 2>/dev/null`
+    File.write("/tmp/pgdist.pgdist_test20130501", exist_record.join("\t") + "\n")
+    `rm /tmp/pgdist.pgdist_test20130430.seq 2>/dev/null`
+    File.write("/tmp/pgdist.pgdist_test20130501.seq", "1")
+  end
   before :all do
     Fluent::Test.setup
     @input_records = [
       {'id'=>'100','created_at'=>'2013-04-30T01:23:45Z','text'=>'message1'},
       {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
-      {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'}
+      {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
+      {'id'=>'102','created_at'=>'2013-05-01T01:23:46Z','text'=>'message3'}
     ]
-    conf = %[
+    tag = "pgdist.test"
+    @default_conf = %[
 host localhost
 username postgres
 password postgres
@@ -17,20 +45,52 @@ table_moniker {|record|t=record["created_at"];"pgdist.pgdist_test"+t[0..3]+t[5..
 insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
 columns id,created_at,value
 values $1,$2,$3
+file_moniker {|table|"/tmp/"+table}
     ]
-    tag = "pgdist.test"
-    @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, tag).configure(conf)
+    @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(@default_conf)
+    unique_conf = @default_conf + "unique_column id\n"
+    @unique_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(unique_conf)
+    @connection = PG::Connection.new({
+      :host => "localhost", :port => 5432,
+      :user => "postgres", :password => "postgres",
+      :dbname => "pgdist"
+    })
   end
-  describe "#table_name(record)" do
-    context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
-      it "should return 'pgdist.pgdist_test20130430'" do
-        result = @driver.instance.table_name(@input_records[0])
-        result.should == "pgdist.pgdist_test20130430"
+  after :all do
+    @connection.close
+  end
+  describe "#db_escape(str)" do
+    context "when str = nil" do
+      it "should return \\N" do
+        @driver.instance.db_escape(nil).should == "\\N"
+      end
+    end
+    context "when str = #{"\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t".inspect}" do
+      it "should return #{"\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t".inspect}" do
+        input = "\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t"
+        expected = "\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t"
+        @driver.instance.db_escape(input).should == expected
       end
     end
   end
+  describe "#delete_existing_records(handler, table, record)" do
+    before :each do
+      init_test_tables
+    end
+    it "should delete existing records" do
+      handler = @unique_driver.instance.client
+      records = @input_records[1..3]
+      @unique_driver.instance.delete_existing_records(handler, "pgdist.pgdist_test20130501", records)
+      records.shift.should == {"id"=>"102", "created_at"=>"2013-05-01T01:23:46Z", "text"=>"message3"}
+      records.size.should == 0
+    end
+  end
   describe "#filter_for_insert(record)" do
     context "when record is valid Hash" do
       it "should output valid Array" do
@@ -55,64 +115,253 @@ values $1,$2,$3
     end
   end
-  describe "#write(chunk)" do
-    before :all do
-      @connection = PG::Connection.new({
-        :host => "localhost", :port => 5432,
-        :user => "postgres", :password => "postgres",
-        :dbname => "pgdist"
-      })
-      sql = <<-EOF
-      CREATE SCHEMA pgdist;
-      DROP TABLE pgdist.pgdist_test20130430;
-      DROP TABLE pgdist.pgdist_test20130501;
-      CREATE TABLE pgdist.pgdist_test20130430(
-        seq serial NOT NULL,
-        id text unique,
-        created_at timestamp without time zone,
-        value text);
-      CREATE TABLE pgdist.pgdist_test20130501(
-        seq serial NOT NULL,
-        id text unique,
-        created_at timestamp without time zone,
-        value text);
-      EOF
-      `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
+  describe "#table_name(record)" do
+    context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
+      it "should return 'pgdist.pgdist_test20130430'" do
+        result = @driver.instance.table_name(@input_records[0])
+        result.should == "pgdist.pgdist_test20130430"
+      end
     end
+  end
-    it "should insert into PostgreSQL" do
-      records = @input_records
+  describe "#write(chunk)" do
+    before :all do
       time = Time.utc(2013,4,30,1,23,45).to_i
-      records.each do |record|
+      @input_records.each do |record|
         @driver.emit(record, time)
+        @unique_driver.emit(record, time)
+      end
+    end
+    before :each do
+      init_test_tables
+    end
+    context "when unique_column is not set" do
+      it "should insert into PostgreSQL" do
+        @driver.run
+        result = @connection.exec("select * from pgdist.pgdist_test20130430")
+        result.ntuples.should == 1
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "100"
+        record["created_at"].should == "2013-04-30 01:23:45"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "100"
+        hash["created_at"].should == "2013-04-30T01:23:45Z"
+        hash["text"].should == "message1"
+        result = @connection.exec("select * from pgdist.pgdist_test20130501")
+        result.ntuples.should == 2
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "101"
+        record["created_at"].should == "2013-05-01 01:23:45"
+        record["value"].should == "dummy"
+        record = result[1]
+        record["seq"].should == "4"
+        record["id"].should == "102"
+        record["created_at"].should == "2013-05-01 01:23:46"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "102"
+        hash["created_at"].should == "2013-05-01T01:23:46Z"
+        hash["text"].should == "message3"
+      end
+      it "should append to file" do
+        @driver.run
+        result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
+        result.should == "1"
+        result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
+        result.size.should == 0
+        result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
+        result.should == "4"
+        result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
+        result.shift.should == ["4", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
+        result.size.should == 0
+      end
+    end
+    context "when unique_column is set" do
+      it "should insert into PostgreSQL" do
+        @unique_driver.run
+        result = @connection.exec("select * from pgdist.pgdist_test20130430")
+        result.ntuples.should == 1
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "100"
+        record["created_at"].should == "2013-04-30 01:23:45"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "100"
+        hash["created_at"].should == "2013-04-30T01:23:45Z"
+        hash["text"].should == "message1"
+        result = @connection.exec("select * from pgdist.pgdist_test20130501")
+        result.ntuples.should == 2
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "101"
+        record["created_at"].should == "2013-05-01 01:23:45"
+        record["value"].should == "dummy"
+        record = result[1]
+        record["seq"].should == "2"
+        record["id"].should == "102"
+        record["created_at"].should == "2013-05-01 01:23:46"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "102"
+        hash["created_at"].should == "2013-05-01T01:23:46Z"
+        hash["text"].should == "message3"
+      end
+      it "should append to file" do
+        @unique_driver.run
+        result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
+        result.should == "1"
+        result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
+        result.size.should == 0
+        result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
+        result.should == "2"
+        result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
+        result.shift.should == ["2", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
+        result.size.should == 0
       end
-      @driver.run
-      result = @connection.exec("select * from pgdist.pgdist_test20130430")
-      record = result[0]
-      record["seq"].should == "1"
-      record["id"].should == "100"
-      record["created_at"].should == "2013-04-30 01:23:45"
-      json = record["value"]
-      hash = JSON.parse(json)
-      hash["id"].should == "100"
-      hash["created_at"].should == "2013-04-30T01:23:45Z"
-      hash["text"].should == "message1"
-      result = @connection.exec("select * from pgdist.pgdist_test20130501")
-      record = result[0]
-      record["seq"].should == "1"
-      record["id"].should == "101"
-      record["created_at"].should == "2013-05-01 01:23:45"
-      json = record["value"]
-      hash = JSON.parse(json)
-      hash["id"].should == "101"
-      hash["created_at"].should == "2013-05-01T01:23:45Z"
-      hash["text"].should == "message2"
-      @connection.close
     end
   end
-end
+  describe "#write_pg_result(output_stream, file_fields, pg_result)" do
+    before :all do
+      @pg_result = @connection.exec(%[
+        (select 1 as seq, '100' as id, E'test\\ndata1' as value)
+        union
+        (select 2 as seq, '101' as id, E'test\\ndata2' as value)
+      ])
+    end
+    after :all do
+    end
+    before :each do
+      pipe = IO.pipe
+      @output_reader = pipe[0]
+      @output_writer = pipe[1]
+    end
+    after :each do
+      @output_reader.close unless @output_reader.closed?
+      @output_writer.close unless @output_writer.closed?
+    end
+    context "when file_format = json" do
+      before :each do
+        json_conf = @default_conf + %[
+          file_format json
+          file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
+        ]
+        @json_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(json_conf)
+      end
+      it "should output json data" do
+        @json_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = JSON.parse(result)
+        result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
+        result = @output_reader.gets.chomp
+        result = JSON.parse(result)
+        result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
+      end
+    end
+    context "when file_format = ltsv" do
+      before :each do
+        ltsv_conf = @default_conf + %[
+          file_format ltsv
+          file_record_filter {|f,r|[["seq","id","text"],[r[0],r[1],r[2]]]}
+        ]
+        @ltsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(ltsv_conf)
+      end
+      it "should output ltsv data" do
+        @ltsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
+        result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\\ndata1"}
+        result = @output_reader.gets.chomp
+        result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
+        result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\\ndata2"}
+        @output_reader.gets.should be_nil
+      end
+    end
+    context "when file_format = msgpack" do
+      before :each do
+        msgpack_conf = @default_conf + %[
+          file_format msgpack
+          file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
+        ]
+        @msgpack_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(msgpack_conf)
+      end
+      it "should output message_pack data" do
+        @msgpack_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        unpacker = MessagePack::Unpacker.new(@output_reader)
+        result = []
+        begin
+          unpacker.each do |record|
+            result.push record
+          end
+        rescue EOFError => e
+        end
+        result.shift.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
+        result.shift.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
+      end
+    end
+    context "when file_format = raw" do
+      before :each do
+        tsv_conf = @default_conf + "file_format raw\n"
+        @tsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(tsv_conf)
+      end
+      it "should output tsv data" do
+        @tsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = result.split(/\t/)
+        result.should == ["1", "100", "test\\ndata1"]
+        result = @output_reader.gets.chomp
+        result = result.split(/\t/)
+        result.should == ["2", "101", "test\\ndata2"]
+        @output_reader.gets.should be_nil
+      end
+    end
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-pgdist
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -9,40 +9,40 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-01 00:00:00.000000000 Z
+date: 2013-05-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fluentd
   requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.10.33
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.10.33
 - !ruby/object:Gem::Dependency
   name: pg
   requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.15.1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.15.1
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
@@ -237,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 4199809916756292803
+      hash: -910513567270313927
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: