fluent-plugin-pgdist 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
 
9
- gem "fluentd"
10
- gem "pg"
9
+ gem "fluentd", "~> 0.10.33"
10
+ gem "pg", "~> 0.15.1"
11
11
 
12
12
  group :development do
13
13
  gem "rspec", "~> 2.12.0"
data/README.md CHANGED
@@ -7,7 +7,7 @@ fluent-plugin-pgdist is a fluentd plugin for distribute insert into PostgreSQL.
7
7
  # gem install fluentd
8
8
  # gem install fluent-plugin-pgdist
9
9
 
10
- ## Usage
10
+ ## Usage: Insert into PostgreSQL table
11
11
 
12
12
  Define match directive for pgdist in fluentd config file(ex. fluent.conf):
13
13
 
@@ -41,16 +41,125 @@ Input data:
41
41
  Check table data:
42
42
 
43
43
  $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
44
- id | created_at | value
44
+ id | created_at | value
45
45
  -----+---------------------+--------------------------------------------------------------------
46
46
  100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
47
47
  (1 行)
48
48
  $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
49
- id | created_at | value
49
+ id | created_at | value
50
50
  -----+---------------------+--------------------------------------------------------------------
51
- 100 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
51
+ 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
52
52
  (1 行)
53
53
 
54
+ ## Usage: Insert into PostgreSQL with unique constraint
55
+
56
+ Define match directive for pgdist in fluentd config file(ex. fluent.conf):
57
+
58
+ <match pgdist.input>
59
+ type pgdist
60
+ host localhost
61
+ username postgres
62
+ password postgres
63
+ database pgdist
64
+ table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
65
+ insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
66
+ columns id,created_at,value
67
+ values $1,$2,$3
68
+ raise_exception false
69
+ unique_column id
70
+ </match>
71
+
72
+ Run fluentd:
73
+
74
+ $ fluentd -c fluent.conf &
75
+
76
+ Create output table:
77
+
78
+ $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
79
+ $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
80
+
81
+ Input data:
82
+
83
+ $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
84
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
85
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
86
+ $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
87
+
88
+ Check table data:
89
+
90
+ $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
91
+ seq | id | created_at | value
92
+ -----+-----+---------------------+--------------------------------------------------------------------
93
+ 1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
94
+ (1 行)
95
+ $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
96
+ seq | id | created_at | value
97
+ -----+-----+---------------------+--------------------------------------------------------------------
98
+ 1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
99
+ 2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
100
+ (1 行)
101
+
102
+ ## Usage: Insert into PostgreSQL and LTSV file
103
+
104
+ Define match directive for pgdist in fluentd config file(ex. fluent.conf):
105
+
106
+ <match pgdist.input>
107
+ type pgdist
108
+ host localhost
109
+ username postgres
110
+ password postgres
111
+ database pgdist
112
+ table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
113
+ insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
114
+ columns id,created_at,value
115
+ values $1,$2,$3
116
+ raise_exception false
117
+ unique_column id
118
+ file_moniker {|table|"/tmp/"+table}
119
+ file_format ltsv
120
+ file_record_filter {|f,r|h=JSON.parse(r["value"]);[["seq","id","created_at","text"],[r["seq"],r["id"],r["created_at"],h["text"]]]}
121
+ sequence_moniker {|table|"/tmp/"+table+".seq"}
122
+ sequence_column seq
123
+ </match>
124
+
125
+ Run fluentd:
126
+
127
+ $ fluentd -c fluent.conf &
128
+
129
+ Create output table:
130
+
131
+ $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
132
+ $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
133
+
134
+ Input data:
135
+
136
+ $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
137
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
138
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
139
+ $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
140
+
141
+ Check table data:
142
+
143
+ $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
144
+ seq | id | created_at | value
145
+ -----+-----+---------------------+--------------------------------------------------------------------
146
+ 1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
147
+ (1 行)
148
+ $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
149
+ seq | id | created_at | value
150
+ -----+-----+---------------------+--------------------------------------------------------------------
151
+ 1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
152
+ 2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
153
+ (1 行)
154
+
155
+ Check file data:
156
+
157
+ $ cat /tmp/pgdist_test20130430
158
+ seq:1 id:100 created_at:2013-04-30 01:23:45 text:message1
159
+ $ cat /tmp/pgdist_test20130501
160
+ seq:1 id:101 created_at:2013-05-01 01:23:45 text:message2
161
+ seq:2 id:102 created_at:2013-05-01 01:23:46 text:message3
162
+
54
163
  ## Parameter
55
164
 
56
165
  * host
@@ -73,6 +182,18 @@ Check table data:
73
182
  * Column values in insert SQL
74
183
  * raise_exception
75
184
  * Flag to enable/disable exception in insert
185
+ * unique_column
186
+ * Column name with unique constraint
187
+ * file_moniker
188
+ * Ruby script that returns the output file name of each table
189
+ * file_format
190
+ * Output file format. json/ltsv/msgpack/tsv format is available.
191
+ * file_record_filter
192
+ * Ruby script to convert record for json/ltsv/msgpack file. This filter receives hash in json/msgpack format, [fields, values] in ltsv format.
193
+ * sequnece_column
194
+ * Sequence column name in PostgreSQL table
195
+ * sequence_moniker
196
+ * Ruby script that returns the sequence file name of each table
76
197
 
77
198
  ## Contributing to fluent-plugin-pgdist
78
199
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.2.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "fluent-plugin-pgdist"
8
- s.version = "0.1.1"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kenji Hara"]
12
- s.date = "2013-05-01"
12
+ s.date = "2013-05-02"
13
13
  s.description = "Fluentd plugin for distribute insert into PostgreSQL"
14
14
  s.email = "haracane@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -39,8 +39,8 @@ Gem::Specification.new do |s|
39
39
  s.specification_version = 3
40
40
 
41
41
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
42
- s.add_runtime_dependency(%q<fluentd>, [">= 0"])
43
- s.add_runtime_dependency(%q<pg>, [">= 0"])
42
+ s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.33"])
43
+ s.add_runtime_dependency(%q<pg>, ["~> 0.15.1"])
44
44
  s.add_development_dependency(%q<rspec>, ["~> 2.12.0"])
45
45
  s.add_development_dependency(%q<yard>, ["~> 0.8.3"])
46
46
  s.add_development_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -52,8 +52,8 @@ Gem::Specification.new do |s|
52
52
  s.add_development_dependency(%q<ci_reporter>, ["~> 1.8.3"])
53
53
  s.add_development_dependency(%q<flog>, ["~> 3.2.1"])
54
54
  else
55
- s.add_dependency(%q<fluentd>, [">= 0"])
56
- s.add_dependency(%q<pg>, [">= 0"])
55
+ s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
56
+ s.add_dependency(%q<pg>, ["~> 0.15.1"])
57
57
  s.add_dependency(%q<rspec>, ["~> 2.12.0"])
58
58
  s.add_dependency(%q<yard>, ["~> 0.8.3"])
59
59
  s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -66,8 +66,8 @@ Gem::Specification.new do |s|
66
66
  s.add_dependency(%q<flog>, ["~> 3.2.1"])
67
67
  end
68
68
  else
69
- s.add_dependency(%q<fluentd>, [">= 0"])
70
- s.add_dependency(%q<pg>, [">= 0"])
69
+ s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
70
+ s.add_dependency(%q<pg>, ["~> 0.15.1"])
71
71
  s.add_dependency(%q<rspec>, ["~> 2.12.0"])
72
72
  s.add_dependency(%q<yard>, ["~> 0.8.3"])
73
73
  s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -13,60 +13,193 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
13
13
  config_param :table_moniker, :string, :default => nil
14
14
  config_param :insert_filter, :string, :default => nil
15
15
  config_param :raise_exception, :bool, :default => false
16
+ config_param :insert_columns, :string, :default => nil
16
17
  config_param :columns, :string, :default => nil
18
+ config_param :insert_values, :string, :default => nil
17
19
  config_param :values, :string, :default => nil
20
+ config_param :unique_column, :string, :default => nil
18
21
 
19
- config_param :format, :string, :default => "raw" # or json
22
+ config_param :sequence_moniker, :string, :default => nil
23
+
24
+ config_param :file_moniker, :string, :default => nil
25
+ config_param :file_record_filter, :string, :default => nil
26
+ config_param :file_format, :string, :default=> nil
27
+ config_param :file_write_limit, :integer, :default=>10000
28
+ config_param :sequence_column, :string, :default => "seq"
20
29
 
21
30
  attr_accessor :handler
22
31
 
23
- def initialize
24
- super
25
- require 'pg'
32
+ def client
33
+ PG::Connection.new({
34
+ :host => @host, :port => @port,
35
+ :user => @username, :password => @password,
36
+ :dbname => @database
37
+ })
26
38
  end
27
39
 
28
- # We don't currently support mysql's analogous json format
29
40
  def configure(conf)
30
41
  super
31
42
 
32
- if @columns.nil?
33
- raise Fluent::ConfigError, "columns MUST be specified, but missing"
43
+ @insert_columns ||= @columns
44
+ @insert_values ||= @values
45
+
46
+ if @insert_columns.nil?
47
+ raise fluent::configerror, "columns must be specified, but missing"
34
48
  end
35
49
 
36
50
  @table_moniker_lambda = eval("lambda#{@table_moniker}")
37
51
  @insert_filter_lambda = eval("lambda#{@insert_filter}")
52
+ if @file_moniker
53
+ @file_moniker_lambda = eval("lambda#{@file_moniker}")
54
+ @sequence_moniker ||= '{|table|"/tmp/#{table}.seq"}'
55
+ case @file_format
56
+ when "json" || "msgpack" || "message_pack"
57
+ @file_record_filter ||= '{|record|record}'
58
+ when "ltsv"
59
+ @file_record_filter ||= '{|fields,record|[fields,record]}'
60
+ else
61
+ end
62
+ end
63
+ @file_record_filter_lambda = eval("lambda#{@file_record_filter}") if @file_record_filter
64
+ @sequence_moniker_lambda = eval("lambda#{@sequence_moniker}") if @sequence_moniker
38
65
  self
39
66
  end
40
67
 
41
- def start
42
- super
68
+ DB_ESCAPE_PATTERN = Regexp.new("[\\\\\\a\\b\\n\\r\\t]")
69
+
70
+ def db_escape(str)
71
+ return "\\N" if str.nil?
72
+ rest = str
73
+ ret = ''
74
+ while match_data = DB_ESCAPE_PATTERN.match(rest)
75
+ ret += match_data.pre_match
76
+ code = match_data[0]
77
+ rest = match_data.post_match
78
+ case code
79
+ when '\\'
80
+ ret += '\\\\'
81
+ when "\a"
82
+ ret += "\\a"
83
+ when "\b"
84
+ ret += "\\b"
85
+ when "\n"
86
+ ret += "\\n"
87
+ when "\r"
88
+ ret += "\\r"
89
+ when "\t"
90
+ ret += "\\t"
91
+ end
92
+ end
93
+ return ret + rest
43
94
  end
44
95
 
45
- def shutdown
46
- super
96
+ def delete_duplicative_records(records)
97
+ records.uniq!{|r|r[@unique_column]}
47
98
  end
48
99
 
49
- def format(tag, time, record)
50
- [tag, time, record].to_msgpack
100
+ def delete_existing_records(handler, table, records)
101
+ unique_values = records.map{|r|r[@unique_column]}
102
+ if unique_values != []
103
+ where_sql = "where " + 1.upto(unique_values.size).map{|i|"#{@unique_column} = \$#{i}"}.join(" or ")
104
+ handler.prepare("select_#{table}", "select #{@unique_column} from #{table} #{where_sql}")
105
+ result = handler.exec_prepared("select_#{table}", unique_values)
106
+ exist_values = result.column_values(0)
107
+ return if exist_values.size == 0
108
+ $log.info "delete #{exist_values.size} duplicative records for #{table}"
109
+ records.reject!{|r|exist_values.include?(r[@unique_column])}
110
+ end
51
111
  end
52
112
 
53
- def client
54
- pgconn = PG::Connection.new({
55
- :host => @host, :port => @port,
56
- :user => @username, :password => @password,
57
- :dbname => @database
58
- })
59
- return pgconn
113
+ def file_path(table)
114
+ @file_moniker_lambda.call(table)
60
115
  end
61
116
 
62
- def table_name(record)
63
- @table_moniker_lambda.call(record)
117
+ def filter_for_file_record(*args)
118
+ result = @file_record_filter_lambda.call(*args)
119
+ return result
64
120
  end
65
121
 
66
122
  def filter_for_insert(record)
67
123
  @insert_filter_lambda.call(record)
68
124
  end
69
125
 
126
+ def sequence_path(table)
127
+ @sequence_moniker_lambda.call(table)
128
+ end
129
+
130
+ def format(tag, time, record)
131
+ [tag, time, record].to_msgpack
132
+ end
133
+
134
+ def initialize
135
+ super
136
+ require 'pg'
137
+ end
138
+
139
+ def insert_into_db(handler, table, records)
140
+ if @unique_column
141
+ delete_duplicative_records(records)
142
+ delete_existing_records(handler, table, records)
143
+ end
144
+
145
+ $log.info "insert #{records.size} records into #{table}"
146
+ sql = "INSERT INTO #{table}(#{@insert_columns}) VALUES(#{@insert_values})"
147
+ $log.info "execute sql #{sql.inspect}"
148
+ statement = "write_#{table}"
149
+ handler.prepare(statement, sql)
150
+ records.each do |record|
151
+ record = filter_for_insert(record)
152
+ $log.info "insert #{record.inspect}"
153
+ begin
154
+ handler.exec_prepared(statement, record)
155
+ rescue Exception=>e
156
+ if @raise_exception
157
+ raise e
158
+ else
159
+ $log.info e.message
160
+ end
161
+ end
162
+ end
163
+ end
164
+
165
+ def read_last_sequence(filepath)
166
+ last_sequence = File.read(filepath).chomp
167
+ last_sequence = nil if /-?[0-9]+/ !~ last_sequence
168
+ return last_sequence
169
+ end
170
+
171
+ def read_last_sequence_from_file(handler, table, filepath)
172
+ last_line = `tail -n 1 #{filepath}`
173
+ case @file_format
174
+ when "json"
175
+ last_record = JSON.parse(last_line)
176
+ last_sequence = last_record[@sequence_column]
177
+ when "ltsv"
178
+ last_record = Hash[last_line.split(/\t/).map{|p|p.split(/:/, 2)}]
179
+ last_sequence = last_record[@sequence_column]
180
+ else
181
+ result = handler.exec("select * from #{table} limit 0")
182
+ fields = result.fields
183
+ sequence_index = fields.index(@sequence_column)
184
+ last_record = last_line.split(/\t/)
185
+ last_sequence = last_record[sequence_index]
186
+ end
187
+ last_sequence = nil if /-?[0-9]+/ !~ last_sequence
188
+ return last_sequence
189
+ end
190
+
191
+ def shutdown
192
+ super
193
+ end
194
+
195
+ def start
196
+ super
197
+ end
198
+
199
+ def table_name(record)
200
+ @table_moniker_lambda.call(record)
201
+ end
202
+
70
203
  def write(chunk)
71
204
  handler = self.client
72
205
  records_hash = {}
@@ -78,25 +211,55 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
78
211
  end
79
212
  }
80
213
  records_hash.each_pair do |table, records|
81
- $log.info "insert #{records.size} records into #{table}"
82
- sql = "INSERT INTO #{table}(#{@columns}) VALUES(#{@values})"
83
- $log.info "execute sql #{sql.inspect}"
84
- statement = "write_#{table}"
85
- handler.prepare(statement, sql)
86
- records.each do |record|
87
- record = filter_for_insert(record)
88
- $log.info "insert #{record.inspect}"
89
- begin
90
- handler.exec_prepared(statement, record)
91
- rescue Exception=>e
92
- if @raise_exception
93
- raise e
94
- else
95
- $log.info e.message
96
- end
97
- end
98
- end
214
+ insert_into_db(handler, table, records)
215
+ write_to_file(handler, table) if @file_moniker
99
216
  end
100
217
  handler.close
101
218
  end
219
+
220
+ def write_pg_result(output_stream, fields, pg_result)
221
+ case @file_format
222
+ when "json"
223
+ pg_result.each do |tuple|
224
+ tuple = filter_for_file_record(tuple)
225
+ output_stream.puts(tuple.to_json)
226
+ end
227
+ when "ltsv"
228
+ pg_result.each_row do |row|
229
+ fields, row = filter_for_file_record(fields, row)
230
+ output_stream.puts(fields.each_with_index.map{|f,i|"#{f}:#{db_escape(row[i])}"}.join("\t"))
231
+ end
232
+ when "msgpack" || "message_pack"
233
+ pg_result.each do |tuple|
234
+ tuple = filter_for_file_record(tuple)
235
+ output_stream.write(tuple.to_msgpack)
236
+ end
237
+ else
238
+ pg_result.each_row do |row|
239
+ output_stream.puts(row.map{|v|db_escape(v)}.join("\t"))
240
+ end
241
+ end
242
+ end
243
+
244
+ def write_to_file(handler, table)
245
+ sequence_file_path = sequence_path(table)
246
+ last_sequence = read_last_sequence(sequence_file_path) if File.exists?(sequence_file_path)
247
+ file = nil
248
+ while true
249
+ where_sql = "where #{last_sequence} < #{@sequence_column}" if last_sequence
250
+ result = handler.exec("select * from #{table} #{where_sql} order by #{@sequence_column} limit #{@file_write_limit}")
251
+ result_size = result.ntuples
252
+ break if result_size == 0
253
+
254
+ fields ||= result.fields
255
+ file ||= File.open(file_path(table), "a")
256
+ write_pg_result(file, fields, result)
257
+ last_sequence = result[result_size-1][@sequence_column]
258
+ break if result_size < @file_write_limit
259
+ end
260
+ if file
261
+ file.close
262
+ File.write(sequence_file_path, last_sequence.to_s) if last_sequence
263
+ end
264
+ end
102
265
  end
@@ -1,14 +1,42 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe Fluent::PgdistOutput do
4
+ def init_test_tables
5
+ sql = %[
6
+ CREATE SCHEMA pgdist;
7
+ DROP TABLE pgdist.pgdist_test20130430;
8
+ DROP TABLE pgdist.pgdist_test20130501;
9
+ CREATE TABLE pgdist.pgdist_test20130430(
10
+ seq serial NOT NULL,
11
+ id text unique,
12
+ created_at timestamp without time zone,
13
+ value text);
14
+ CREATE TABLE pgdist.pgdist_test20130501(
15
+ seq serial NOT NULL,
16
+ id text unique,
17
+ created_at timestamp without time zone,
18
+ value text);
19
+ INSERT INTO pgdist.pgdist_test20130501(id, created_at, value)
20
+ VALUES ('101', '2013-05-01T01:23:45Z', 'dummy');
21
+ ]
22
+ `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
23
+ exist_record = [1, '101','2013-05-01T01:23:45Z','dummy']
24
+ `rm /tmp/pgdist.pgdist_test20130430 2>/dev/null`
25
+ File.write("/tmp/pgdist.pgdist_test20130501", exist_record.join("\t") + "\n")
26
+ `rm /tmp/pgdist.pgdist_test20130430.seq 2>/dev/null`
27
+ File.write("/tmp/pgdist.pgdist_test20130501.seq", "1")
28
+ end
29
+
4
30
  before :all do
5
31
  Fluent::Test.setup
6
32
  @input_records = [
7
33
  {'id'=>'100','created_at'=>'2013-04-30T01:23:45Z','text'=>'message1'},
8
34
  {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
9
- {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'}
35
+ {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
36
+ {'id'=>'102','created_at'=>'2013-05-01T01:23:46Z','text'=>'message3'}
10
37
  ]
11
- conf = %[
38
+ tag = "pgdist.test"
39
+ @default_conf = %[
12
40
  host localhost
13
41
  username postgres
14
42
  password postgres
@@ -17,20 +45,52 @@ table_moniker {|record|t=record["created_at"];"pgdist.pgdist_test"+t[0..3]+t[5..
17
45
  insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
18
46
  columns id,created_at,value
19
47
  values $1,$2,$3
48
+ file_moniker {|table|"/tmp/"+table}
20
49
  ]
21
- tag = "pgdist.test"
22
- @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, tag).configure(conf)
50
+ @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(@default_conf)
51
+ unique_conf = @default_conf + "unique_column id\n"
52
+ @unique_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(unique_conf)
53
+ @connection = PG::Connection.new({
54
+ :host => "localhost", :port => 5432,
55
+ :user => "postgres", :password => "postgres",
56
+ :dbname => "pgdist"
57
+ })
23
58
  end
24
59
 
25
- describe "#table_name(record)" do
26
- context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
27
- it "should return 'pgdist.pgdist_test20130430'" do
28
- result = @driver.instance.table_name(@input_records[0])
29
- result.should == "pgdist.pgdist_test20130430"
60
+ after :all do
61
+ @connection.close
62
+ end
63
+
64
+ describe "#db_escape(str)" do
65
+ context "when str = nil" do
66
+ it "should return \\N" do
67
+ @driver.instance.db_escape(nil).should == "\\N"
68
+ end
69
+ end
70
+
71
+ context "when str = #{"\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t".inspect}" do
72
+ it "should return #{"\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t".inspect}" do
73
+ input = "\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t"
74
+ expected = "\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t"
75
+ @driver.instance.db_escape(input).should == expected
30
76
  end
31
77
  end
32
78
  end
33
79
 
80
+ describe "#delete_existing_records(handler, table, record)" do
81
+ before :each do
82
+ init_test_tables
83
+ end
84
+
85
+ it "should delete existing records" do
86
+ handler = @unique_driver.instance.client
87
+ records = @input_records[1..3]
88
+ @unique_driver.instance.delete_existing_records(handler, "pgdist.pgdist_test20130501", records)
89
+ records.shift.should == {"id"=>"102", "created_at"=>"2013-05-01T01:23:46Z", "text"=>"message3"}
90
+ records.size.should == 0
91
+ end
92
+ end
93
+
34
94
  describe "#filter_for_insert(record)" do
35
95
  context "when record is valid Hash" do
36
96
  it "should output valid Array" do
@@ -55,64 +115,253 @@ values $1,$2,$3
55
115
  end
56
116
  end
57
117
 
58
- describe "#write(chunk)" do
59
- before :all do
60
- @connection = PG::Connection.new({
61
- :host => "localhost", :port => 5432,
62
- :user => "postgres", :password => "postgres",
63
- :dbname => "pgdist"
64
- })
65
- sql = <<-EOF
66
- CREATE SCHEMA pgdist;
67
- DROP TABLE pgdist.pgdist_test20130430;
68
- DROP TABLE pgdist.pgdist_test20130501;
69
- CREATE TABLE pgdist.pgdist_test20130430(
70
- seq serial NOT NULL,
71
- id text unique,
72
- created_at timestamp without time zone,
73
- value text);
74
- CREATE TABLE pgdist.pgdist_test20130501(
75
- seq serial NOT NULL,
76
- id text unique,
77
- created_at timestamp without time zone,
78
- value text);
79
- EOF
80
- `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
118
+ describe "#table_name(record)" do
119
+ context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
120
+ it "should return 'pgdist.pgdist_test20130430'" do
121
+ result = @driver.instance.table_name(@input_records[0])
122
+ result.should == "pgdist.pgdist_test20130430"
123
+ end
81
124
  end
125
+ end
82
126
 
83
- it "should insert into PostgreSQL" do
84
- records = @input_records
127
+ describe "#write(chunk)" do
128
+ before :all do
85
129
  time = Time.utc(2013,4,30,1,23,45).to_i
86
- records.each do |record|
130
+ @input_records.each do |record|
87
131
  @driver.emit(record, time)
132
+ @unique_driver.emit(record, time)
133
+ end
134
+ end
135
+
136
+ before :each do
137
+ init_test_tables
138
+ end
139
+
140
+ context "when unique_column is not set" do
141
+ it "should insert into PostgreSQL" do
142
+ @driver.run
143
+ result = @connection.exec("select * from pgdist.pgdist_test20130430")
144
+
145
+ result.ntuples.should == 1
146
+
147
+ record = result[0]
148
+ record["seq"].should == "1"
149
+ record["id"].should == "100"
150
+ record["created_at"].should == "2013-04-30 01:23:45"
151
+ json = record["value"]
152
+ hash = JSON.parse(json)
153
+ hash["id"].should == "100"
154
+ hash["created_at"].should == "2013-04-30T01:23:45Z"
155
+ hash["text"].should == "message1"
156
+
157
+ result = @connection.exec("select * from pgdist.pgdist_test20130501")
158
+
159
+ result.ntuples.should == 2
160
+
161
+ record = result[0]
162
+ record["seq"].should == "1"
163
+ record["id"].should == "101"
164
+ record["created_at"].should == "2013-05-01 01:23:45"
165
+ record["value"].should == "dummy"
166
+
167
+ record = result[1]
168
+ record["seq"].should == "4"
169
+ record["id"].should == "102"
170
+ record["created_at"].should == "2013-05-01 01:23:46"
171
+ json = record["value"]
172
+ hash = JSON.parse(json)
173
+ hash["id"].should == "102"
174
+ hash["created_at"].should == "2013-05-01T01:23:46Z"
175
+ hash["text"].should == "message3"
176
+ end
177
+
178
+ it "should append to file" do
179
+ @driver.run
180
+
181
+ result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
182
+ result.should == "1"
183
+
184
+ result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
185
+ result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
186
+ result.size.should == 0
187
+
188
+ result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
189
+ result.should == "4"
190
+
191
+ result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
192
+ result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
193
+ result.shift.should == ["4", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
194
+ result.size.should == 0
195
+ end
196
+ end
197
+
198
+ context "when unique_column is set" do
199
+ it "should insert into PostgreSQL" do
200
+ @unique_driver.run
201
+ result = @connection.exec("select * from pgdist.pgdist_test20130430")
202
+
203
+ result.ntuples.should == 1
204
+
205
+ record = result[0]
206
+ record["seq"].should == "1"
207
+ record["id"].should == "100"
208
+ record["created_at"].should == "2013-04-30 01:23:45"
209
+ json = record["value"]
210
+ hash = JSON.parse(json)
211
+ hash["id"].should == "100"
212
+ hash["created_at"].should == "2013-04-30T01:23:45Z"
213
+ hash["text"].should == "message1"
214
+
215
+ result = @connection.exec("select * from pgdist.pgdist_test20130501")
216
+
217
+ result.ntuples.should == 2
218
+
219
+ record = result[0]
220
+ record["seq"].should == "1"
221
+ record["id"].should == "101"
222
+ record["created_at"].should == "2013-05-01 01:23:45"
223
+ record["value"].should == "dummy"
224
+
225
+ record = result[1]
226
+ record["seq"].should == "2"
227
+ record["id"].should == "102"
228
+ record["created_at"].should == "2013-05-01 01:23:46"
229
+ json = record["value"]
230
+ hash = JSON.parse(json)
231
+ hash["id"].should == "102"
232
+ hash["created_at"].should == "2013-05-01T01:23:46Z"
233
+ hash["text"].should == "message3"
234
+ end
235
+
236
+ it "should append to file" do
237
+ @unique_driver.run
238
+
239
+ result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
240
+ result.should == "1"
241
+
242
+ result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
243
+ result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
244
+ result.size.should == 0
245
+
246
+ result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
247
+ result.should == "2"
248
+
249
+ result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
250
+
251
+ result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
252
+ result.shift.should == ["2", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
253
+ result.size.should == 0
88
254
  end
89
- @driver.run
90
- result = @connection.exec("select * from pgdist.pgdist_test20130430")
91
-
92
- record = result[0]
93
- record["seq"].should == "1"
94
- record["id"].should == "100"
95
- record["created_at"].should == "2013-04-30 01:23:45"
96
- json = record["value"]
97
- hash = JSON.parse(json)
98
- hash["id"].should == "100"
99
- hash["created_at"].should == "2013-04-30T01:23:45Z"
100
- hash["text"].should == "message1"
101
-
102
- result = @connection.exec("select * from pgdist.pgdist_test20130501")
103
-
104
- record = result[0]
105
- record["seq"].should == "1"
106
- record["id"].should == "101"
107
- record["created_at"].should == "2013-05-01 01:23:45"
108
- json = record["value"]
109
- hash = JSON.parse(json)
110
- hash["id"].should == "101"
111
- hash["created_at"].should == "2013-05-01T01:23:45Z"
112
- hash["text"].should == "message2"
113
-
114
- @connection.close
115
255
  end
116
256
  end
117
- end
118
257
 
258
+ describe "#write_pg_result(output_stream, file_fields, pg_result)" do
259
+ before :all do
260
+ @pg_result = @connection.exec(%[
261
+ (select 1 as seq, '100' as id, E'test\\ndata1' as value)
262
+ union
263
+ (select 2 as seq, '101' as id, E'test\\ndata2' as value)
264
+ ])
265
+ end
266
+
267
+ after :all do
268
+ end
269
+
270
+ before :each do
271
+ pipe = IO.pipe
272
+ @output_reader = pipe[0]
273
+ @output_writer = pipe[1]
274
+ end
275
+
276
+ after :each do
277
+ @output_reader.close unless @output_reader.closed?
278
+ @output_writer.close unless @output_writer.closed?
279
+ end
280
+
281
+ context "when file_format = json" do
282
+ before :each do
283
+ json_conf = @default_conf + %[
284
+ file_format json
285
+ file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
286
+ ]
287
+ @json_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(json_conf)
288
+ end
289
+
290
+ it "should output json data" do
291
+ @json_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
292
+ @output_writer.close
293
+ result = @output_reader.gets.chomp
294
+ result = JSON.parse(result)
295
+ result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
296
+ result = @output_reader.gets.chomp
297
+ result = JSON.parse(result)
298
+ result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
299
+ end
300
+ end
301
+
302
+ context "when file_format = ltsv" do
303
+ before :each do
304
+ ltsv_conf = @default_conf + %[
305
+ file_format ltsv
306
+ file_record_filter {|f,r|[["seq","id","text"],[r[0],r[1],r[2]]]}
307
+ ]
308
+ @ltsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(ltsv_conf)
309
+ end
310
+
311
+ it "should output ltsv data" do
312
+ @ltsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
313
+ @output_writer.close
314
+ result = @output_reader.gets.chomp
315
+ result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
316
+ result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\\ndata1"}
317
+ result = @output_reader.gets.chomp
318
+ result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
319
+ result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\\ndata2"}
320
+ @output_reader.gets.should be_nil
321
+ end
322
+ end
323
+
324
+ context "when file_format = msgpack" do
325
+ before :each do
326
+ msgpack_conf = @default_conf + %[
327
+ file_format msgpack
328
+ file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
329
+ ]
330
+ @msgpack_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(msgpack_conf)
331
+ end
332
+
333
+ it "should output message_pack data" do
334
+ @msgpack_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
335
+ @output_writer.close
336
+ unpacker = MessagePack::Unpacker.new(@output_reader)
337
+ result = []
338
+ begin
339
+ unpacker.each do |record|
340
+ result.push record
341
+ end
342
+ rescue EOFError => e
343
+ end
344
+ result.shift.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
345
+ result.shift.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
346
+ end
347
+ end
348
+
349
+ context "when file_format = raw" do
350
+ before :each do
351
+ tsv_conf = @default_conf + "file_format raw\n"
352
+ @tsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(tsv_conf)
353
+ end
354
+ it "should output tsv data" do
355
+ @tsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
356
+ @output_writer.close
357
+ result = @output_reader.gets.chomp
358
+ result = result.split(/\t/)
359
+ result.should == ["1", "100", "test\\ndata1"]
360
+ result = @output_reader.gets.chomp
361
+ result = result.split(/\t/)
362
+ result.should == ["2", "101", "test\\ndata2"]
363
+ @output_reader.gets.should be_nil
364
+ end
365
+ end
366
+ end
367
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-pgdist
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,40 +9,40 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-01 00:00:00.000000000 Z
12
+ date: 2013-05-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ! '>='
19
+ - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 0.10.33
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
- - - ! '>='
27
+ - - ~>
28
28
  - !ruby/object:Gem::Version
29
- version: '0'
29
+ version: 0.10.33
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: pg
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - ! '>='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: '0'
37
+ version: 0.15.1
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - ! '>='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: '0'
45
+ version: 0.15.1
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: rspec
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -237,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
237
237
  version: '0'
238
238
  segments:
239
239
  - 0
240
- hash: 4199809916756292803
240
+ hash: -910513567270313927
241
241
  required_rubygems_version: !ruby/object:Gem::Requirement
242
242
  none: false
243
243
  requirements: