RubyGems - fluent-plugin-pgdist - Versions diffs - 0.1.1 → 0.2.0 - Mend

fluent-plugin-pgdist 0.1.1 → 0.2.0

Files changed (7) hide show

data/Gemfile +2 -2
data/README.md +125 -4
data/VERSION +1 -1
data/fluent-plugin-pgdist.gemspec +8 -8
data/lib/fluent/plugin/out_pgdist.rb +203 -40
data/spec/lib/fluent/plugin/out_pgdist_spec.rb +311 -62
metadata +11 -11

data/Gemfile CHANGED

@@ -6,8 +6,8 @@ source "http://rubygems.org"
 # Add dependencies to develop your gem here.
 # Include everything needed to run rake, tests, features, etc.
-gem "fluentd"
-gem "pg"
+gem "fluentd", "~> 0.10.33"
+gem "pg", "~> 0.15.1"
 group :development do
   gem "rspec", "~> 2.12.0"

data/README.md CHANGED

@@ -7,7 +7,7 @@ fluent-plugin-pgdist is a fluentd plugin for distribute insert into PostgreSQL.
     # gem install fluentd
     # gem install fluent-plugin-pgdist
-## Usage
+## Usage: Insert into PostgreSQL table
 Define match directive for pgdist in fluentd config file(ex. fluent.conf):
@@ -41,16 +41,125 @@ Input data:
 Check table data:
     $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
-     id  |     created_at      |                               value
+     id  |     created_at      |                               value
     -----+---------------------+--------------------------------------------------------------------
      100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
     (1 行)
     $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
-     id  |     created_at      |                               value
+     id  |     created_at      |                               value
     -----+---------------------+--------------------------------------------------------------------
-     100 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+     101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
     (1 行)
+## Usage: Insert into PostgreSQL with unique constraint
+Define match directive for pgdist in fluentd config file(ex. fluent.conf):
+    <match pgdist.input>
+      type pgdist
+      host localhost
+      username postgres
+      password postgres
+      database pgdist
+      table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
+      insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
+      columns id,created_at,value
+      values $1,$2,$3
+      raise_exception false
+      unique_column id
+    </match>
+Run fluentd:
+    $ fluentd -c fluent.conf &
+Create output table:
+    $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+    $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+Input data:
+    $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
+Check table data:
+    $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
+    (1 行)
+    $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+       2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
+    (1 行)
+## Usage: Insert into PostgreSQL and LTSV file
+Define match directive for pgdist in fluentd config file(ex. fluent.conf):
+    <match pgdist.input>
+      type pgdist
+      host localhost
+      username postgres
+      password postgres
+      database pgdist
+      table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
+      insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
+      columns id,created_at,value
+      values $1,$2,$3
+      raise_exception false
+      unique_column id
+      file_moniker {|table|"/tmp/"+table}
+      file_format ltsv
+      file_record_filter {|f,r|h=JSON.parse(r["value"]);[["seq","id","created_at","text"],[r["seq"],r["id"],r["created_at"],h["text"]]]}
+      sequence_moniker {|table|"/tmp/"+table+".seq"}
+      sequence_column seq
+    </match>
+Run fluentd:
+    $ fluentd -c fluent.conf &
+Create output table:
+    $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+    $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
+Input data:
+    $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
+    $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
+Check table data:
+    $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
+    (1 行)
+    $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
+     seq | id  |     created_at      |                               value
+    -----+-----+---------------------+--------------------------------------------------------------------
+       1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
+       2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
+    (1 行)
+Check file data:
+    $ cat /tmp/pgdist_test20130430
+    seq:1   id:100  created_at:2013-04-30 01:23:45  text:message1
+    $ cat /tmp/pgdist_test20130501
+    seq:1   id:101  created_at:2013-05-01 01:23:45  text:message2
+    seq:2   id:102  created_at:2013-05-01 01:23:46  text:message3
 ## Parameter
 * host
@@ -73,6 +182,18 @@ Check table data:
  * Column values in insert SQL
 * raise_exception
  * Flag to enable/disable exception in insert
+* unique_column
+ * Column name with unique constraint
+* file_moniker
+ * Ruby script that returns the output file name of each table
+* file_format
+ * Output file format. json/ltsv/msgpack/tsv format is available.
+* file_record_filter
+ * Ruby script to convert record for json/ltsv/msgpack file. This filter receives hash in json/msgpack format, [fields, values] in ltsv format.
+* sequnece_column
+ * Sequence column name in PostgreSQL table
+* sequence_moniker
+ * Ruby script that returns the sequence file name of each table
 ## Contributing to fluent-plugin-pgdist

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.1
1	+ 0.2.0

data/fluent-plugin-pgdist.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "fluent-plugin-pgdist"
-  s.version = "0.1.1"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Kenji Hara"]
-  s.date = "2013-05-01"
+  s.date = "2013-05-02"
   s.description = "Fluentd plugin for distribute insert into PostgreSQL"
   s.email = "haracane@gmail.com"
   s.extra_rdoc_files = [
@@ -39,8 +39,8 @@ Gem::Specification.new do |s|
     s.specification_version = 3
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<fluentd>, [">= 0"])
-      s.add_runtime_dependency(%q<pg>, [">= 0"])
+      s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.33"])
+      s.add_runtime_dependency(%q<pg>, ["~> 0.15.1"])
       s.add_development_dependency(%q<rspec>, ["~> 2.12.0"])
       s.add_development_dependency(%q<yard>, ["~> 0.8.3"])
       s.add_development_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -52,8 +52,8 @@ Gem::Specification.new do |s|
       s.add_development_dependency(%q<ci_reporter>, ["~> 1.8.3"])
       s.add_development_dependency(%q<flog>, ["~> 3.2.1"])
     else
-      s.add_dependency(%q<fluentd>, [">= 0"])
-      s.add_dependency(%q<pg>, [">= 0"])
+      s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
+      s.add_dependency(%q<pg>, ["~> 0.15.1"])
       s.add_dependency(%q<rspec>, ["~> 2.12.0"])
       s.add_dependency(%q<yard>, ["~> 0.8.3"])
       s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -66,8 +66,8 @@ Gem::Specification.new do |s|
       s.add_dependency(%q<flog>, ["~> 3.2.1"])
     end
   else
-    s.add_dependency(%q<fluentd>, [">= 0"])
-    s.add_dependency(%q<pg>, [">= 0"])
+    s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
+    s.add_dependency(%q<pg>, ["~> 0.15.1"])
     s.add_dependency(%q<rspec>, ["~> 2.12.0"])
     s.add_dependency(%q<yard>, ["~> 0.8.3"])
     s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])

data/lib/fluent/plugin/out_pgdist.rb CHANGED

@@ -13,60 +13,193 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
   config_param :table_moniker, :string, :default => nil
   config_param :insert_filter, :string, :default => nil
   config_param :raise_exception, :bool, :default => false
+  config_param :insert_columns, :string, :default => nil
   config_param :columns, :string, :default => nil
+  config_param :insert_values, :string, :default => nil
   config_param :values, :string, :default => nil
+  config_param :unique_column, :string, :default => nil
-  config_param :format, :string, :default => "raw" # or json
+  config_param :sequence_moniker, :string, :default => nil
+  config_param :file_moniker, :string, :default => nil
+  config_param :file_record_filter, :string, :default => nil
+  config_param :file_format, :string, :default=> nil
+  config_param :file_write_limit, :integer, :default=>10000
+  config_param :sequence_column, :string, :default => "seq"
   attr_accessor :handler
-  def initialize
-    super
-    require 'pg'
+  def client
+    PG::Connection.new({
+      :host => @host, :port => @port,
+      :user => @username, :password => @password,
+      :dbname => @database
+    })
   end
-  # We don't currently support mysql's analogous json format
   def configure(conf)
     super
-    if @columns.nil?
-      raise Fluent::ConfigError, "columns MUST be specified, but missing"
+    @insert_columns ||= @columns
+    @insert_values ||= @values
+    if @insert_columns.nil?
+      raise fluent::configerror, "columns must be specified, but missing"
     end
     @table_moniker_lambda = eval("lambda#{@table_moniker}")
     @insert_filter_lambda = eval("lambda#{@insert_filter}")
+    if @file_moniker
+      @file_moniker_lambda = eval("lambda#{@file_moniker}")
+      @sequence_moniker ||= '{|table|"/tmp/#{table}.seq"}'
+      case @file_format
+      when "json" || "msgpack" || "message_pack"
+        @file_record_filter ||= '{|record|record}'
+      when "ltsv"
+        @file_record_filter ||= '{|fields,record|[fields,record]}'
+      else
+      end
+    end
+    @file_record_filter_lambda = eval("lambda#{@file_record_filter}") if @file_record_filter
+    @sequence_moniker_lambda = eval("lambda#{@sequence_moniker}") if @sequence_moniker
     self
   end
-  def start
-    super
+  DB_ESCAPE_PATTERN = Regexp.new("[\\\\\\a\\b\\n\\r\\t]")
+  def db_escape(str)
+    return "\\N" if str.nil?
+    rest = str
+    ret = ''
+    while match_data = DB_ESCAPE_PATTERN.match(rest)
+      ret += match_data.pre_match
+      code = match_data[0]
+      rest = match_data.post_match
+      case code
+      when '\\'
+        ret += '\\\\'
+      when "\a"
+        ret += "\\a"
+      when "\b"
+        ret += "\\b"
+      when "\n"
+        ret += "\\n"
+      when "\r"
+        ret += "\\r"
+      when "\t"
+        ret += "\\t"
+      end
+    end
+    return ret + rest
   end
-  def shutdown
-    super
+  def delete_duplicative_records(records)
+    records.uniq!{|r|r[@unique_column]}
   end
-  def format(tag, time, record)
-    [tag, time, record].to_msgpack
+  def delete_existing_records(handler, table, records)
+    unique_values = records.map{|r|r[@unique_column]}
+    if unique_values != []
+      where_sql = "where " + 1.upto(unique_values.size).map{|i|"#{@unique_column} = \$#{i}"}.join(" or ")
+      handler.prepare("select_#{table}", "select #{@unique_column} from #{table} #{where_sql}")
+      result = handler.exec_prepared("select_#{table}", unique_values)
+      exist_values = result.column_values(0)
+      return if exist_values.size == 0
+      $log.info "delete #{exist_values.size} duplicative records for #{table}"
+      records.reject!{|r|exist_values.include?(r[@unique_column])}
+    end
   end
-  def client
-    pgconn = PG::Connection.new({
-      :host => @host, :port => @port,
-      :user => @username, :password => @password,
-      :dbname => @database
-    })
-    return pgconn
+  def file_path(table)
+    @file_moniker_lambda.call(table)
   end
-  def table_name(record)
-    @table_moniker_lambda.call(record)
+  def filter_for_file_record(*args)
+    result = @file_record_filter_lambda.call(*args)
+    return result
   end
   def filter_for_insert(record)
     @insert_filter_lambda.call(record)
   end
+  def sequence_path(table)
+    @sequence_moniker_lambda.call(table)
+  end
+  def format(tag, time, record)
+    [tag, time, record].to_msgpack
+  end
+  def initialize
+    super
+    require 'pg'
+  end
+  def insert_into_db(handler, table, records)
+    if @unique_column
+      delete_duplicative_records(records)
+      delete_existing_records(handler, table, records)
+    end
+    $log.info "insert #{records.size} records into #{table}"
+    sql = "INSERT INTO #{table}(#{@insert_columns}) VALUES(#{@insert_values})"
+    $log.info "execute sql #{sql.inspect}"
+    statement = "write_#{table}"
+    handler.prepare(statement, sql)
+    records.each do |record|
+      record = filter_for_insert(record)
+      $log.info "insert #{record.inspect}"
+      begin
+        handler.exec_prepared(statement, record)
+      rescue Exception=>e
+        if @raise_exception
+          raise e
+        else
+          $log.info e.message
+        end
+      end
+    end
+  end
+  def read_last_sequence(filepath)
+    last_sequence =  File.read(filepath).chomp
+    last_sequence = nil if /-?[0-9]+/ !~ last_sequence
+    return last_sequence
+  end
+  def read_last_sequence_from_file(handler, table, filepath)
+    last_line = `tail -n 1 #{filepath}`
+    case @file_format
+    when "json"
+      last_record = JSON.parse(last_line)
+      last_sequence = last_record[@sequence_column]
+    when "ltsv"
+      last_record = Hash[last_line.split(/\t/).map{|p|p.split(/:/, 2)}]
+      last_sequence = last_record[@sequence_column]
+    else
+      result = handler.exec("select * from #{table} limit 0")
+      fields = result.fields
+      sequence_index = fields.index(@sequence_column)
+      last_record = last_line.split(/\t/)
+      last_sequence = last_record[sequence_index]
+    end
+    last_sequence = nil if /-?[0-9]+/ !~ last_sequence
+    return last_sequence
+  end
+  def shutdown
+    super
+  end
+  def start
+    super
+  end
+  def table_name(record)
+    @table_moniker_lambda.call(record)
+  end
   def write(chunk)
     handler = self.client
     records_hash = {}
@@ -78,25 +211,55 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
       end
     }
     records_hash.each_pair do |table, records|
-      $log.info "insert #{records.size} records into #{table}"
-      sql = "INSERT INTO #{table}(#{@columns}) VALUES(#{@values})"
-      $log.info "execute sql #{sql.inspect}"
-      statement = "write_#{table}"
-      handler.prepare(statement, sql)
-      records.each do |record|
-        record = filter_for_insert(record)
-        $log.info "insert #{record.inspect}"
-        begin
-          handler.exec_prepared(statement, record)
-        rescue Exception=>e
-          if @raise_exception
-            raise e
-          else
-            $log.info e.message
-          end
-        end
-      end
+      insert_into_db(handler, table, records)
+      write_to_file(handler, table) if @file_moniker
     end
     handler.close
   end
+  def write_pg_result(output_stream, fields, pg_result)
+    case @file_format
+    when "json"
+      pg_result.each do |tuple|
+        tuple = filter_for_file_record(tuple)
+        output_stream.puts(tuple.to_json)
+      end
+    when "ltsv"
+      pg_result.each_row do |row|
+        fields, row = filter_for_file_record(fields, row)
+        output_stream.puts(fields.each_with_index.map{|f,i|"#{f}:#{db_escape(row[i])}"}.join("\t"))
+      end
+    when "msgpack" || "message_pack"
+      pg_result.each do |tuple|
+        tuple = filter_for_file_record(tuple)
+        output_stream.write(tuple.to_msgpack)
+      end
+    else
+      pg_result.each_row do |row|
+        output_stream.puts(row.map{|v|db_escape(v)}.join("\t"))
+      end
+    end
+  end
+  def write_to_file(handler, table)
+    sequence_file_path = sequence_path(table)
+    last_sequence = read_last_sequence(sequence_file_path) if File.exists?(sequence_file_path)
+    file = nil
+    while true
+      where_sql = "where #{last_sequence} < #{@sequence_column}" if last_sequence
+      result = handler.exec("select * from #{table} #{where_sql} order by #{@sequence_column} limit #{@file_write_limit}")
+      result_size = result.ntuples
+      break if result_size == 0
+      fields ||= result.fields
+      file ||= File.open(file_path(table), "a")
+      write_pg_result(file, fields, result)
+      last_sequence = result[result_size-1][@sequence_column]
+      break if result_size < @file_write_limit
+    end
+    if file
+      file.close
+      File.write(sequence_file_path, last_sequence.to_s) if last_sequence
+    end
+  end
 end

data/spec/lib/fluent/plugin/out_pgdist_spec.rb CHANGED

@@ -1,14 +1,42 @@
 require "spec_helper"
 describe Fluent::PgdistOutput do
+  def init_test_tables
+    sql = %[
+      CREATE SCHEMA pgdist;
+      DROP TABLE pgdist.pgdist_test20130430;
+      DROP TABLE pgdist.pgdist_test20130501;
+      CREATE TABLE pgdist.pgdist_test20130430(
+        seq serial NOT NULL,
+        id text unique,
+        created_at timestamp without time zone,
+        value text);
+      CREATE TABLE pgdist.pgdist_test20130501(
+        seq serial NOT NULL,
+        id text unique,
+        created_at timestamp without time zone,
+        value text);
+      INSERT INTO pgdist.pgdist_test20130501(id, created_at, value)
+      VALUES ('101', '2013-05-01T01:23:45Z', 'dummy');
+    ]
+    `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
+    exist_record = [1, '101','2013-05-01T01:23:45Z','dummy']
+    `rm /tmp/pgdist.pgdist_test20130430 2>/dev/null`
+    File.write("/tmp/pgdist.pgdist_test20130501", exist_record.join("\t") + "\n")
+    `rm /tmp/pgdist.pgdist_test20130430.seq 2>/dev/null`
+    File.write("/tmp/pgdist.pgdist_test20130501.seq", "1")
+  end
   before :all do
     Fluent::Test.setup
     @input_records = [
       {'id'=>'100','created_at'=>'2013-04-30T01:23:45Z','text'=>'message1'},
       {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
-      {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'}
+      {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
+      {'id'=>'102','created_at'=>'2013-05-01T01:23:46Z','text'=>'message3'}
     ]
-    conf = %[
+    tag = "pgdist.test"
+    @default_conf = %[
 host localhost
 username postgres
 password postgres
@@ -17,20 +45,52 @@ table_moniker {|record|t=record["created_at"];"pgdist.pgdist_test"+t[0..3]+t[5..
 insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
 columns id,created_at,value
 values $1,$2,$3
+file_moniker {|table|"/tmp/"+table}
     ]
-    tag = "pgdist.test"
-    @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, tag).configure(conf)
+    @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(@default_conf)
+    unique_conf = @default_conf + "unique_column id\n"
+    @unique_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(unique_conf)
+    @connection = PG::Connection.new({
+      :host => "localhost", :port => 5432,
+      :user => "postgres", :password => "postgres",
+      :dbname => "pgdist"
+    })
   end
-  describe "#table_name(record)" do
-    context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
-      it "should return 'pgdist.pgdist_test20130430'" do
-        result = @driver.instance.table_name(@input_records[0])
-        result.should == "pgdist.pgdist_test20130430"
+  after :all do
+    @connection.close
+  end
+  describe "#db_escape(str)" do
+    context "when str = nil" do
+      it "should return \\N" do
+        @driver.instance.db_escape(nil).should == "\\N"
+      end
+    end
+    context "when str = #{"\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t".inspect}" do
+      it "should return #{"\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t".inspect}" do
+        input = "\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t"
+        expected = "\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t"
+        @driver.instance.db_escape(input).should == expected
       end
     end
   end
+  describe "#delete_existing_records(handler, table, record)" do
+    before :each do
+      init_test_tables
+    end
+    it "should delete existing records" do
+      handler = @unique_driver.instance.client
+      records = @input_records[1..3]
+      @unique_driver.instance.delete_existing_records(handler, "pgdist.pgdist_test20130501", records)
+      records.shift.should == {"id"=>"102", "created_at"=>"2013-05-01T01:23:46Z", "text"=>"message3"}
+      records.size.should == 0
+    end
+  end
   describe "#filter_for_insert(record)" do
     context "when record is valid Hash" do
       it "should output valid Array" do
@@ -55,64 +115,253 @@ values $1,$2,$3
     end
   end
-  describe "#write(chunk)" do
-    before :all do
-      @connection = PG::Connection.new({
-        :host => "localhost", :port => 5432,
-        :user => "postgres", :password => "postgres",
-        :dbname => "pgdist"
-      })
-      sql = <<-EOF
-      CREATE SCHEMA pgdist;
-      DROP TABLE pgdist.pgdist_test20130430;
-      DROP TABLE pgdist.pgdist_test20130501;
-      CREATE TABLE pgdist.pgdist_test20130430(
-        seq serial NOT NULL,
-        id text unique,
-        created_at timestamp without time zone,
-        value text);
-      CREATE TABLE pgdist.pgdist_test20130501(
-        seq serial NOT NULL,
-        id text unique,
-        created_at timestamp without time zone,
-        value text);
-      EOF
-      `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
+  describe "#table_name(record)" do
+    context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
+      it "should return 'pgdist.pgdist_test20130430'" do
+        result = @driver.instance.table_name(@input_records[0])
+        result.should == "pgdist.pgdist_test20130430"
+      end
     end
+  end
-    it "should insert into PostgreSQL" do
-      records = @input_records
+  describe "#write(chunk)" do
+    before :all do
       time = Time.utc(2013,4,30,1,23,45).to_i
-      records.each do |record|
+      @input_records.each do |record|
         @driver.emit(record, time)
+        @unique_driver.emit(record, time)
+      end
+    end
+    before :each do
+      init_test_tables
+    end
+    context "when unique_column is not set" do
+      it "should insert into PostgreSQL" do
+        @driver.run
+        result = @connection.exec("select * from pgdist.pgdist_test20130430")
+        result.ntuples.should == 1
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "100"
+        record["created_at"].should == "2013-04-30 01:23:45"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "100"
+        hash["created_at"].should == "2013-04-30T01:23:45Z"
+        hash["text"].should == "message1"
+        result = @connection.exec("select * from pgdist.pgdist_test20130501")
+        result.ntuples.should == 2
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "101"
+        record["created_at"].should == "2013-05-01 01:23:45"
+        record["value"].should == "dummy"
+        record = result[1]
+        record["seq"].should == "4"
+        record["id"].should == "102"
+        record["created_at"].should == "2013-05-01 01:23:46"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "102"
+        hash["created_at"].should == "2013-05-01T01:23:46Z"
+        hash["text"].should == "message3"
+      end
+      it "should append to file" do
+        @driver.run
+        result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
+        result.should == "1"
+        result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
+        result.size.should == 0
+        result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
+        result.should == "4"
+        result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
+        result.shift.should == ["4", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
+        result.size.should == 0
+      end
+    end
+    context "when unique_column is set" do
+      it "should insert into PostgreSQL" do
+        @unique_driver.run
+        result = @connection.exec("select * from pgdist.pgdist_test20130430")
+        result.ntuples.should == 1
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "100"
+        record["created_at"].should == "2013-04-30 01:23:45"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "100"
+        hash["created_at"].should == "2013-04-30T01:23:45Z"
+        hash["text"].should == "message1"
+        result = @connection.exec("select * from pgdist.pgdist_test20130501")
+        result.ntuples.should == 2
+        record = result[0]
+        record["seq"].should == "1"
+        record["id"].should == "101"
+        record["created_at"].should == "2013-05-01 01:23:45"
+        record["value"].should == "dummy"
+        record = result[1]
+        record["seq"].should == "2"
+        record["id"].should == "102"
+        record["created_at"].should == "2013-05-01 01:23:46"
+        json = record["value"]
+        hash = JSON.parse(json)
+        hash["id"].should == "102"
+        hash["created_at"].should == "2013-05-01T01:23:46Z"
+        hash["text"].should == "message3"
+      end
+      it "should append to file" do
+        @unique_driver.run
+        result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
+        result.should == "1"
+        result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
+        result.size.should == 0
+        result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
+        result.should == "2"
+        result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
+        result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
+        result.shift.should == ["2", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
+        result.size.should == 0
       end
-      @driver.run
-      result = @connection.exec("select * from pgdist.pgdist_test20130430")
-      record = result[0]
-      record["seq"].should == "1"
-      record["id"].should == "100"
-      record["created_at"].should == "2013-04-30 01:23:45"
-      json = record["value"]
-      hash = JSON.parse(json)
-      hash["id"].should == "100"
-      hash["created_at"].should == "2013-04-30T01:23:45Z"
-      hash["text"].should == "message1"
-      result = @connection.exec("select * from pgdist.pgdist_test20130501")
-      record = result[0]
-      record["seq"].should == "1"
-      record["id"].should == "101"
-      record["created_at"].should == "2013-05-01 01:23:45"
-      json = record["value"]
-      hash = JSON.parse(json)
-      hash["id"].should == "101"
-      hash["created_at"].should == "2013-05-01T01:23:45Z"
-      hash["text"].should == "message2"
-      @connection.close
     end
   end
-end
+  describe "#write_pg_result(output_stream, file_fields, pg_result)" do
+    before :all do
+      @pg_result = @connection.exec(%[
+        (select 1 as seq, '100' as id, E'test\\ndata1' as value)
+        union
+        (select 2 as seq, '101' as id, E'test\\ndata2' as value)
+      ])
+    end
+    after :all do
+    end
+    before :each do
+      pipe = IO.pipe
+      @output_reader = pipe[0]
+      @output_writer = pipe[1]
+    end
+    after :each do
+      @output_reader.close unless @output_reader.closed?
+      @output_writer.close unless @output_writer.closed?
+    end
+    context "when file_format = json" do
+      before :each do
+        json_conf = @default_conf + %[
+          file_format json
+          file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
+        ]
+        @json_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(json_conf)
+      end
+      it "should output json data" do
+        @json_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = JSON.parse(result)
+        result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
+        result = @output_reader.gets.chomp
+        result = JSON.parse(result)
+        result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
+      end
+    end
+    context "when file_format = ltsv" do
+      before :each do
+        ltsv_conf = @default_conf + %[
+          file_format ltsv
+          file_record_filter {|f,r|[["seq","id","text"],[r[0],r[1],r[2]]]}
+        ]
+        @ltsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(ltsv_conf)
+      end
+      it "should output ltsv data" do
+        @ltsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
+        result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\\ndata1"}
+        result = @output_reader.gets.chomp
+        result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
+        result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\\ndata2"}
+        @output_reader.gets.should be_nil
+      end
+    end
+    context "when file_format = msgpack" do
+      before :each do
+        msgpack_conf = @default_conf + %[
+          file_format msgpack
+          file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
+        ]
+        @msgpack_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(msgpack_conf)
+      end
+      it "should output message_pack data" do
+        @msgpack_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        unpacker = MessagePack::Unpacker.new(@output_reader)
+        result = []
+        begin
+          unpacker.each do |record|
+            result.push record
+          end
+        rescue EOFError => e
+        end
+        result.shift.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
+        result.shift.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
+      end
+    end
+    context "when file_format = raw" do
+      before :each do
+        tsv_conf = @default_conf + "file_format raw\n"
+        @tsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(tsv_conf)
+      end
+      it "should output tsv data" do
+        @tsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
+        @output_writer.close
+        result = @output_reader.gets.chomp
+        result = result.split(/\t/)
+        result.should == ["1", "100", "test\\ndata1"]
+        result = @output_reader.gets.chomp
+        result = result.split(/\t/)
+        result.should == ["2", "101", "test\\ndata2"]
+        @output_reader.gets.should be_nil
+      end
+    end
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-pgdist
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -9,40 +9,40 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-01 00:00:00.000000000 Z
+date: 2013-05-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fluentd
   requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.10.33
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.10.33
 - !ruby/object:Gem::Dependency
   name: pg
   requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.15.1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 0.15.1
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
@@ -237,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 4199809916756292803
+      hash: -910513567270313927
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: