fluent-plugin-pgdist 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
6
6
  # Add dependencies to develop your gem here.
7
7
  # Include everything needed to run rake, tests, features, etc.
8
8
 
9
- gem "fluentd"
10
- gem "pg"
9
+ gem "fluentd", "~> 0.10.33"
10
+ gem "pg", "~> 0.15.1"
11
11
 
12
12
  group :development do
13
13
  gem "rspec", "~> 2.12.0"
data/README.md CHANGED
@@ -7,7 +7,7 @@ fluent-plugin-pgdist is a fluentd plugin for distribute insert into PostgreSQL.
7
7
  # gem install fluentd
8
8
  # gem install fluent-plugin-pgdist
9
9
 
10
- ## Usage
10
+ ## Usage: Insert into PostgreSQL table
11
11
 
12
12
  Define match directive for pgdist in fluentd config file(ex. fluent.conf):
13
13
 
@@ -41,16 +41,125 @@ Input data:
41
41
  Check table data:
42
42
 
43
43
  $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
44
- id | created_at | value
44
+ id | created_at | value
45
45
  -----+---------------------+--------------------------------------------------------------------
46
46
  100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
47
47
  (1 行)
48
48
  $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
49
- id | created_at | value
49
+ id | created_at | value
50
50
  -----+---------------------+--------------------------------------------------------------------
51
- 100 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
51
+ 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
52
52
  (1 行)
53
53
 
54
+ ## Usage: Insert into PostgreSQL with unique constraint
55
+
56
+ Define match directive for pgdist in fluentd config file(ex. fluent.conf):
57
+
58
+ <match pgdist.input>
59
+ type pgdist
60
+ host localhost
61
+ username postgres
62
+ password postgres
63
+ database pgdist
64
+ table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
65
+ insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
66
+ columns id,created_at,value
67
+ values $1,$2,$3
68
+ raise_exception false
69
+ unique_column id
70
+ </match>
71
+
72
+ Run fluentd:
73
+
74
+ $ fluentd -c fluent.conf &
75
+
76
+ Create output table:
77
+
78
+ $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
79
+ $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
80
+
81
+ Input data:
82
+
83
+ $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
84
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
85
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
86
+ $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
87
+
88
+ Check table data:
89
+
90
+ $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
91
+ seq | id | created_at | value
92
+ -----+-----+---------------------+--------------------------------------------------------------------
93
+ 1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
94
+ (1 行)
95
+ $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
96
+ seq | id | created_at | value
97
+ -----+-----+---------------------+--------------------------------------------------------------------
98
+ 1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
99
+ 2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
100
+ (1 行)
101
+
102
+ ## Usage: Insert into PostgreSQL and LTSV file
103
+
104
+ Define match directive for pgdist in fluentd config file(ex. fluent.conf):
105
+
106
+ <match pgdist.input>
107
+ type pgdist
108
+ host localhost
109
+ username postgres
110
+ password postgres
111
+ database pgdist
112
+ table_moniker {|record|t=record["created_at"];"pgdist_test"+t[0..3]+t[5..6]+t[8..9]}
113
+ insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
114
+ columns id,created_at,value
115
+ values $1,$2,$3
116
+ raise_exception false
117
+ unique_column id
118
+ file_moniker {|table|"/tmp/"+table}
119
+ file_format ltsv
120
+ file_record_filter {|f,r|h=JSON.parse(r["value"]);[["seq","id","created_at","text"],[r["seq"],r["id"],r["created_at"],h["text"]]]}
121
+ sequence_moniker {|table|"/tmp/"+table+".seq"}
122
+ sequence_column seq
123
+ </match>
124
+
125
+ Run fluentd:
126
+
127
+ $ fluentd -c fluent.conf &
128
+
129
+ Create output table:
130
+
131
+ $ echo 'CREATE TABLE pgdist_test20130430(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
132
+ $ echo 'CREATE TABLE pgdist_test20130501(seq serial, id text unique, created_at timestamp without time zone, value text);' | psql -U postgres -d pgdist
133
+
134
+ Input data:
135
+
136
+ $ echo '{"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}' | fluent-cat pgdist.input
137
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
138
+ $ echo '{"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}' | fluent-cat pgdist.input
139
+ $ echo '{"id":"102","created_at":"2013-05-01T01:23:46Z","text":"message3"}' | fluent-cat pgdist.input
140
+
141
+ Check table data:
142
+
143
+ $ echo 'select * from pgdist_test20130430' | psql -U postgres -d pgdist
144
+ seq | id | created_at | value
145
+ -----+-----+---------------------+--------------------------------------------------------------------
146
+ 1 | 100 | 2013-04-30 01:23:45 | {"id":"100","created_at":"2013-04-30T01:23:45Z","text":"message1"}
147
+ (1 行)
148
+ $ echo 'select * from pgdist_test20130501' | psql -U postgres -d pgdist
149
+ seq | id | created_at | value
150
+ -----+-----+---------------------+--------------------------------------------------------------------
151
+ 1 | 101 | 2013-05-01 01:23:45 | {"id":"101","created_at":"2013-05-01T01:23:45Z","text":"message2"}
152
+ 2 | 102 | 2013-05-01 01:23:46 | {"id":"102","created_at":"2013-05-01T01:23:45Z","text":"message3"}
153
+ (1 行)
154
+
155
+ Check file data:
156
+
157
+ $ cat /tmp/pgdist_test20130430
158
+ seq:1 id:100 created_at:2013-04-30 01:23:45 text:message1
159
+ $ cat /tmp/pgdist_test20130501
160
+ seq:1 id:101 created_at:2013-05-01 01:23:45 text:message2
161
+ seq:2 id:102 created_at:2013-05-01 01:23:46 text:message3
162
+
54
163
  ## Parameter
55
164
 
56
165
  * host
@@ -73,6 +182,18 @@ Check table data:
73
182
  * Column values in insert SQL
74
183
  * raise_exception
75
184
  * Flag to enable/disable exception in insert
185
+ * unique_column
186
+ * Column name with unique constraint
187
+ * file_moniker
188
+ * Ruby script that returns the output file name of each table
189
+ * file_format
190
+ * Output file format. json/ltsv/msgpack/tsv format is available.
191
+ * file_record_filter
192
+ * Ruby script to convert record for json/ltsv/msgpack file. This filter receives hash in json/msgpack format, [fields, values] in ltsv format.
193
+ * sequnece_column
194
+ * Sequence column name in PostgreSQL table
195
+ * sequence_moniker
196
+ * Ruby script that returns the sequence file name of each table
76
197
 
77
198
  ## Contributing to fluent-plugin-pgdist
78
199
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.2.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "fluent-plugin-pgdist"
8
- s.version = "0.1.1"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kenji Hara"]
12
- s.date = "2013-05-01"
12
+ s.date = "2013-05-02"
13
13
  s.description = "Fluentd plugin for distribute insert into PostgreSQL"
14
14
  s.email = "haracane@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -39,8 +39,8 @@ Gem::Specification.new do |s|
39
39
  s.specification_version = 3
40
40
 
41
41
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
42
- s.add_runtime_dependency(%q<fluentd>, [">= 0"])
43
- s.add_runtime_dependency(%q<pg>, [">= 0"])
42
+ s.add_runtime_dependency(%q<fluentd>, ["~> 0.10.33"])
43
+ s.add_runtime_dependency(%q<pg>, ["~> 0.15.1"])
44
44
  s.add_development_dependency(%q<rspec>, ["~> 2.12.0"])
45
45
  s.add_development_dependency(%q<yard>, ["~> 0.8.3"])
46
46
  s.add_development_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -52,8 +52,8 @@ Gem::Specification.new do |s|
52
52
  s.add_development_dependency(%q<ci_reporter>, ["~> 1.8.3"])
53
53
  s.add_development_dependency(%q<flog>, ["~> 3.2.1"])
54
54
  else
55
- s.add_dependency(%q<fluentd>, [">= 0"])
56
- s.add_dependency(%q<pg>, [">= 0"])
55
+ s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
56
+ s.add_dependency(%q<pg>, ["~> 0.15.1"])
57
57
  s.add_dependency(%q<rspec>, ["~> 2.12.0"])
58
58
  s.add_dependency(%q<yard>, ["~> 0.8.3"])
59
59
  s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -66,8 +66,8 @@ Gem::Specification.new do |s|
66
66
  s.add_dependency(%q<flog>, ["~> 3.2.1"])
67
67
  end
68
68
  else
69
- s.add_dependency(%q<fluentd>, [">= 0"])
70
- s.add_dependency(%q<pg>, [">= 0"])
69
+ s.add_dependency(%q<fluentd>, ["~> 0.10.33"])
70
+ s.add_dependency(%q<pg>, ["~> 0.15.1"])
71
71
  s.add_dependency(%q<rspec>, ["~> 2.12.0"])
72
72
  s.add_dependency(%q<yard>, ["~> 0.8.3"])
73
73
  s.add_dependency(%q<redcarpet>, ["~> 2.2.2"])
@@ -13,60 +13,193 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
13
13
  config_param :table_moniker, :string, :default => nil
14
14
  config_param :insert_filter, :string, :default => nil
15
15
  config_param :raise_exception, :bool, :default => false
16
+ config_param :insert_columns, :string, :default => nil
16
17
  config_param :columns, :string, :default => nil
18
+ config_param :insert_values, :string, :default => nil
17
19
  config_param :values, :string, :default => nil
20
+ config_param :unique_column, :string, :default => nil
18
21
 
19
- config_param :format, :string, :default => "raw" # or json
22
+ config_param :sequence_moniker, :string, :default => nil
23
+
24
+ config_param :file_moniker, :string, :default => nil
25
+ config_param :file_record_filter, :string, :default => nil
26
+ config_param :file_format, :string, :default=> nil
27
+ config_param :file_write_limit, :integer, :default=>10000
28
+ config_param :sequence_column, :string, :default => "seq"
20
29
 
21
30
  attr_accessor :handler
22
31
 
23
- def initialize
24
- super
25
- require 'pg'
32
+ def client
33
+ PG::Connection.new({
34
+ :host => @host, :port => @port,
35
+ :user => @username, :password => @password,
36
+ :dbname => @database
37
+ })
26
38
  end
27
39
 
28
- # We don't currently support mysql's analogous json format
29
40
  def configure(conf)
30
41
  super
31
42
 
32
- if @columns.nil?
33
- raise Fluent::ConfigError, "columns MUST be specified, but missing"
43
+ @insert_columns ||= @columns
44
+ @insert_values ||= @values
45
+
46
+ if @insert_columns.nil?
47
+ raise fluent::configerror, "columns must be specified, but missing"
34
48
  end
35
49
 
36
50
  @table_moniker_lambda = eval("lambda#{@table_moniker}")
37
51
  @insert_filter_lambda = eval("lambda#{@insert_filter}")
52
+ if @file_moniker
53
+ @file_moniker_lambda = eval("lambda#{@file_moniker}")
54
+ @sequence_moniker ||= '{|table|"/tmp/#{table}.seq"}'
55
+ case @file_format
56
+ when "json" || "msgpack" || "message_pack"
57
+ @file_record_filter ||= '{|record|record}'
58
+ when "ltsv"
59
+ @file_record_filter ||= '{|fields,record|[fields,record]}'
60
+ else
61
+ end
62
+ end
63
+ @file_record_filter_lambda = eval("lambda#{@file_record_filter}") if @file_record_filter
64
+ @sequence_moniker_lambda = eval("lambda#{@sequence_moniker}") if @sequence_moniker
38
65
  self
39
66
  end
40
67
 
41
- def start
42
- super
68
+ DB_ESCAPE_PATTERN = Regexp.new("[\\\\\\a\\b\\n\\r\\t]")
69
+
70
+ def db_escape(str)
71
+ return "\\N" if str.nil?
72
+ rest = str
73
+ ret = ''
74
+ while match_data = DB_ESCAPE_PATTERN.match(rest)
75
+ ret += match_data.pre_match
76
+ code = match_data[0]
77
+ rest = match_data.post_match
78
+ case code
79
+ when '\\'
80
+ ret += '\\\\'
81
+ when "\a"
82
+ ret += "\\a"
83
+ when "\b"
84
+ ret += "\\b"
85
+ when "\n"
86
+ ret += "\\n"
87
+ when "\r"
88
+ ret += "\\r"
89
+ when "\t"
90
+ ret += "\\t"
91
+ end
92
+ end
93
+ return ret + rest
43
94
  end
44
95
 
45
- def shutdown
46
- super
96
+ def delete_duplicative_records(records)
97
+ records.uniq!{|r|r[@unique_column]}
47
98
  end
48
99
 
49
- def format(tag, time, record)
50
- [tag, time, record].to_msgpack
100
+ def delete_existing_records(handler, table, records)
101
+ unique_values = records.map{|r|r[@unique_column]}
102
+ if unique_values != []
103
+ where_sql = "where " + 1.upto(unique_values.size).map{|i|"#{@unique_column} = \$#{i}"}.join(" or ")
104
+ handler.prepare("select_#{table}", "select #{@unique_column} from #{table} #{where_sql}")
105
+ result = handler.exec_prepared("select_#{table}", unique_values)
106
+ exist_values = result.column_values(0)
107
+ return if exist_values.size == 0
108
+ $log.info "delete #{exist_values.size} duplicative records for #{table}"
109
+ records.reject!{|r|exist_values.include?(r[@unique_column])}
110
+ end
51
111
  end
52
112
 
53
- def client
54
- pgconn = PG::Connection.new({
55
- :host => @host, :port => @port,
56
- :user => @username, :password => @password,
57
- :dbname => @database
58
- })
59
- return pgconn
113
+ def file_path(table)
114
+ @file_moniker_lambda.call(table)
60
115
  end
61
116
 
62
- def table_name(record)
63
- @table_moniker_lambda.call(record)
117
+ def filter_for_file_record(*args)
118
+ result = @file_record_filter_lambda.call(*args)
119
+ return result
64
120
  end
65
121
 
66
122
  def filter_for_insert(record)
67
123
  @insert_filter_lambda.call(record)
68
124
  end
69
125
 
126
+ def sequence_path(table)
127
+ @sequence_moniker_lambda.call(table)
128
+ end
129
+
130
+ def format(tag, time, record)
131
+ [tag, time, record].to_msgpack
132
+ end
133
+
134
+ def initialize
135
+ super
136
+ require 'pg'
137
+ end
138
+
139
+ def insert_into_db(handler, table, records)
140
+ if @unique_column
141
+ delete_duplicative_records(records)
142
+ delete_existing_records(handler, table, records)
143
+ end
144
+
145
+ $log.info "insert #{records.size} records into #{table}"
146
+ sql = "INSERT INTO #{table}(#{@insert_columns}) VALUES(#{@insert_values})"
147
+ $log.info "execute sql #{sql.inspect}"
148
+ statement = "write_#{table}"
149
+ handler.prepare(statement, sql)
150
+ records.each do |record|
151
+ record = filter_for_insert(record)
152
+ $log.info "insert #{record.inspect}"
153
+ begin
154
+ handler.exec_prepared(statement, record)
155
+ rescue Exception=>e
156
+ if @raise_exception
157
+ raise e
158
+ else
159
+ $log.info e.message
160
+ end
161
+ end
162
+ end
163
+ end
164
+
165
+ def read_last_sequence(filepath)
166
+ last_sequence = File.read(filepath).chomp
167
+ last_sequence = nil if /-?[0-9]+/ !~ last_sequence
168
+ return last_sequence
169
+ end
170
+
171
+ def read_last_sequence_from_file(handler, table, filepath)
172
+ last_line = `tail -n 1 #{filepath}`
173
+ case @file_format
174
+ when "json"
175
+ last_record = JSON.parse(last_line)
176
+ last_sequence = last_record[@sequence_column]
177
+ when "ltsv"
178
+ last_record = Hash[last_line.split(/\t/).map{|p|p.split(/:/, 2)}]
179
+ last_sequence = last_record[@sequence_column]
180
+ else
181
+ result = handler.exec("select * from #{table} limit 0")
182
+ fields = result.fields
183
+ sequence_index = fields.index(@sequence_column)
184
+ last_record = last_line.split(/\t/)
185
+ last_sequence = last_record[sequence_index]
186
+ end
187
+ last_sequence = nil if /-?[0-9]+/ !~ last_sequence
188
+ return last_sequence
189
+ end
190
+
191
+ def shutdown
192
+ super
193
+ end
194
+
195
+ def start
196
+ super
197
+ end
198
+
199
+ def table_name(record)
200
+ @table_moniker_lambda.call(record)
201
+ end
202
+
70
203
  def write(chunk)
71
204
  handler = self.client
72
205
  records_hash = {}
@@ -78,25 +211,55 @@ class Fluent::PgdistOutput < Fluent::BufferedOutput
78
211
  end
79
212
  }
80
213
  records_hash.each_pair do |table, records|
81
- $log.info "insert #{records.size} records into #{table}"
82
- sql = "INSERT INTO #{table}(#{@columns}) VALUES(#{@values})"
83
- $log.info "execute sql #{sql.inspect}"
84
- statement = "write_#{table}"
85
- handler.prepare(statement, sql)
86
- records.each do |record|
87
- record = filter_for_insert(record)
88
- $log.info "insert #{record.inspect}"
89
- begin
90
- handler.exec_prepared(statement, record)
91
- rescue Exception=>e
92
- if @raise_exception
93
- raise e
94
- else
95
- $log.info e.message
96
- end
97
- end
98
- end
214
+ insert_into_db(handler, table, records)
215
+ write_to_file(handler, table) if @file_moniker
99
216
  end
100
217
  handler.close
101
218
  end
219
+
220
+ def write_pg_result(output_stream, fields, pg_result)
221
+ case @file_format
222
+ when "json"
223
+ pg_result.each do |tuple|
224
+ tuple = filter_for_file_record(tuple)
225
+ output_stream.puts(tuple.to_json)
226
+ end
227
+ when "ltsv"
228
+ pg_result.each_row do |row|
229
+ fields, row = filter_for_file_record(fields, row)
230
+ output_stream.puts(fields.each_with_index.map{|f,i|"#{f}:#{db_escape(row[i])}"}.join("\t"))
231
+ end
232
+ when "msgpack" || "message_pack"
233
+ pg_result.each do |tuple|
234
+ tuple = filter_for_file_record(tuple)
235
+ output_stream.write(tuple.to_msgpack)
236
+ end
237
+ else
238
+ pg_result.each_row do |row|
239
+ output_stream.puts(row.map{|v|db_escape(v)}.join("\t"))
240
+ end
241
+ end
242
+ end
243
+
244
+ def write_to_file(handler, table)
245
+ sequence_file_path = sequence_path(table)
246
+ last_sequence = read_last_sequence(sequence_file_path) if File.exists?(sequence_file_path)
247
+ file = nil
248
+ while true
249
+ where_sql = "where #{last_sequence} < #{@sequence_column}" if last_sequence
250
+ result = handler.exec("select * from #{table} #{where_sql} order by #{@sequence_column} limit #{@file_write_limit}")
251
+ result_size = result.ntuples
252
+ break if result_size == 0
253
+
254
+ fields ||= result.fields
255
+ file ||= File.open(file_path(table), "a")
256
+ write_pg_result(file, fields, result)
257
+ last_sequence = result[result_size-1][@sequence_column]
258
+ break if result_size < @file_write_limit
259
+ end
260
+ if file
261
+ file.close
262
+ File.write(sequence_file_path, last_sequence.to_s) if last_sequence
263
+ end
264
+ end
102
265
  end
@@ -1,14 +1,42 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe Fluent::PgdistOutput do
4
+ def init_test_tables
5
+ sql = %[
6
+ CREATE SCHEMA pgdist;
7
+ DROP TABLE pgdist.pgdist_test20130430;
8
+ DROP TABLE pgdist.pgdist_test20130501;
9
+ CREATE TABLE pgdist.pgdist_test20130430(
10
+ seq serial NOT NULL,
11
+ id text unique,
12
+ created_at timestamp without time zone,
13
+ value text);
14
+ CREATE TABLE pgdist.pgdist_test20130501(
15
+ seq serial NOT NULL,
16
+ id text unique,
17
+ created_at timestamp without time zone,
18
+ value text);
19
+ INSERT INTO pgdist.pgdist_test20130501(id, created_at, value)
20
+ VALUES ('101', '2013-05-01T01:23:45Z', 'dummy');
21
+ ]
22
+ `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
23
+ exist_record = [1, '101','2013-05-01T01:23:45Z','dummy']
24
+ `rm /tmp/pgdist.pgdist_test20130430 2>/dev/null`
25
+ File.write("/tmp/pgdist.pgdist_test20130501", exist_record.join("\t") + "\n")
26
+ `rm /tmp/pgdist.pgdist_test20130430.seq 2>/dev/null`
27
+ File.write("/tmp/pgdist.pgdist_test20130501.seq", "1")
28
+ end
29
+
4
30
  before :all do
5
31
  Fluent::Test.setup
6
32
  @input_records = [
7
33
  {'id'=>'100','created_at'=>'2013-04-30T01:23:45Z','text'=>'message1'},
8
34
  {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
9
- {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'}
35
+ {'id'=>'101','created_at'=>'2013-05-01T01:23:45Z','text'=>'message2'},
36
+ {'id'=>'102','created_at'=>'2013-05-01T01:23:46Z','text'=>'message3'}
10
37
  ]
11
- conf = %[
38
+ tag = "pgdist.test"
39
+ @default_conf = %[
12
40
  host localhost
13
41
  username postgres
14
42
  password postgres
@@ -17,20 +45,52 @@ table_moniker {|record|t=record["created_at"];"pgdist.pgdist_test"+t[0..3]+t[5..
17
45
  insert_filter {|record|[record["id"],record["created_at"],record.to_json]}
18
46
  columns id,created_at,value
19
47
  values $1,$2,$3
48
+ file_moniker {|table|"/tmp/"+table}
20
49
  ]
21
- tag = "pgdist.test"
22
- @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, tag).configure(conf)
50
+ @driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(@default_conf)
51
+ unique_conf = @default_conf + "unique_column id\n"
52
+ @unique_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(unique_conf)
53
+ @connection = PG::Connection.new({
54
+ :host => "localhost", :port => 5432,
55
+ :user => "postgres", :password => "postgres",
56
+ :dbname => "pgdist"
57
+ })
23
58
  end
24
59
 
25
- describe "#table_name(record)" do
26
- context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
27
- it "should return 'pgdist.pgdist_test20130430'" do
28
- result = @driver.instance.table_name(@input_records[0])
29
- result.should == "pgdist.pgdist_test20130430"
60
+ after :all do
61
+ @connection.close
62
+ end
63
+
64
+ describe "#db_escape(str)" do
65
+ context "when str = nil" do
66
+ it "should return \\N" do
67
+ @driver.instance.db_escape(nil).should == "\\N"
68
+ end
69
+ end
70
+
71
+ context "when str = #{"\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t".inspect}" do
72
+ it "should return #{"\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t".inspect}" do
73
+ input = "\\1\a2\b3\n4\r5\t6\\\a\b\n\r\t"
74
+ expected = "\\\\1\\a2\\b3\\n4\\r5\\t6\\\\\\a\\b\\n\\r\\t"
75
+ @driver.instance.db_escape(input).should == expected
30
76
  end
31
77
  end
32
78
  end
33
79
 
80
+ describe "#delete_existing_records(handler, table, record)" do
81
+ before :each do
82
+ init_test_tables
83
+ end
84
+
85
+ it "should delete existing records" do
86
+ handler = @unique_driver.instance.client
87
+ records = @input_records[1..3]
88
+ @unique_driver.instance.delete_existing_records(handler, "pgdist.pgdist_test20130501", records)
89
+ records.shift.should == {"id"=>"102", "created_at"=>"2013-05-01T01:23:46Z", "text"=>"message3"}
90
+ records.size.should == 0
91
+ end
92
+ end
93
+
34
94
  describe "#filter_for_insert(record)" do
35
95
  context "when record is valid Hash" do
36
96
  it "should output valid Array" do
@@ -55,64 +115,253 @@ values $1,$2,$3
55
115
  end
56
116
  end
57
117
 
58
- describe "#write(chunk)" do
59
- before :all do
60
- @connection = PG::Connection.new({
61
- :host => "localhost", :port => 5432,
62
- :user => "postgres", :password => "postgres",
63
- :dbname => "pgdist"
64
- })
65
- sql = <<-EOF
66
- CREATE SCHEMA pgdist;
67
- DROP TABLE pgdist.pgdist_test20130430;
68
- DROP TABLE pgdist.pgdist_test20130501;
69
- CREATE TABLE pgdist.pgdist_test20130430(
70
- seq serial NOT NULL,
71
- id text unique,
72
- created_at timestamp without time zone,
73
- value text);
74
- CREATE TABLE pgdist.pgdist_test20130501(
75
- seq serial NOT NULL,
76
- id text unique,
77
- created_at timestamp without time zone,
78
- value text);
79
- EOF
80
- `echo "#{sql}" | psql -U postgres -d pgdist 2>/dev/null`
118
+ describe "#table_name(record)" do
119
+ context "when record['created_at'] = '2013-04-30T01:23:45Z'" do
120
+ it "should return 'pgdist.pgdist_test20130430'" do
121
+ result = @driver.instance.table_name(@input_records[0])
122
+ result.should == "pgdist.pgdist_test20130430"
123
+ end
81
124
  end
125
+ end
82
126
 
83
- it "should insert into PostgreSQL" do
84
- records = @input_records
127
+ describe "#write(chunk)" do
128
+ before :all do
85
129
  time = Time.utc(2013,4,30,1,23,45).to_i
86
- records.each do |record|
130
+ @input_records.each do |record|
87
131
  @driver.emit(record, time)
132
+ @unique_driver.emit(record, time)
133
+ end
134
+ end
135
+
136
+ before :each do
137
+ init_test_tables
138
+ end
139
+
140
+ context "when unique_column is not set" do
141
+ it "should insert into PostgreSQL" do
142
+ @driver.run
143
+ result = @connection.exec("select * from pgdist.pgdist_test20130430")
144
+
145
+ result.ntuples.should == 1
146
+
147
+ record = result[0]
148
+ record["seq"].should == "1"
149
+ record["id"].should == "100"
150
+ record["created_at"].should == "2013-04-30 01:23:45"
151
+ json = record["value"]
152
+ hash = JSON.parse(json)
153
+ hash["id"].should == "100"
154
+ hash["created_at"].should == "2013-04-30T01:23:45Z"
155
+ hash["text"].should == "message1"
156
+
157
+ result = @connection.exec("select * from pgdist.pgdist_test20130501")
158
+
159
+ result.ntuples.should == 2
160
+
161
+ record = result[0]
162
+ record["seq"].should == "1"
163
+ record["id"].should == "101"
164
+ record["created_at"].should == "2013-05-01 01:23:45"
165
+ record["value"].should == "dummy"
166
+
167
+ record = result[1]
168
+ record["seq"].should == "4"
169
+ record["id"].should == "102"
170
+ record["created_at"].should == "2013-05-01 01:23:46"
171
+ json = record["value"]
172
+ hash = JSON.parse(json)
173
+ hash["id"].should == "102"
174
+ hash["created_at"].should == "2013-05-01T01:23:46Z"
175
+ hash["text"].should == "message3"
176
+ end
177
+
178
+ it "should append to file" do
179
+ @driver.run
180
+
181
+ result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
182
+ result.should == "1"
183
+
184
+ result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
185
+ result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
186
+ result.size.should == 0
187
+
188
+ result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
189
+ result.should == "4"
190
+
191
+ result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
192
+ result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
193
+ result.shift.should == ["4", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
194
+ result.size.should == 0
195
+ end
196
+ end
197
+
198
+ context "when unique_column is set" do
199
+ it "should insert into PostgreSQL" do
200
+ @unique_driver.run
201
+ result = @connection.exec("select * from pgdist.pgdist_test20130430")
202
+
203
+ result.ntuples.should == 1
204
+
205
+ record = result[0]
206
+ record["seq"].should == "1"
207
+ record["id"].should == "100"
208
+ record["created_at"].should == "2013-04-30 01:23:45"
209
+ json = record["value"]
210
+ hash = JSON.parse(json)
211
+ hash["id"].should == "100"
212
+ hash["created_at"].should == "2013-04-30T01:23:45Z"
213
+ hash["text"].should == "message1"
214
+
215
+ result = @connection.exec("select * from pgdist.pgdist_test20130501")
216
+
217
+ result.ntuples.should == 2
218
+
219
+ record = result[0]
220
+ record["seq"].should == "1"
221
+ record["id"].should == "101"
222
+ record["created_at"].should == "2013-05-01 01:23:45"
223
+ record["value"].should == "dummy"
224
+
225
+ record = result[1]
226
+ record["seq"].should == "2"
227
+ record["id"].should == "102"
228
+ record["created_at"].should == "2013-05-01 01:23:46"
229
+ json = record["value"]
230
+ hash = JSON.parse(json)
231
+ hash["id"].should == "102"
232
+ hash["created_at"].should == "2013-05-01T01:23:46Z"
233
+ hash["text"].should == "message3"
234
+ end
235
+
236
+ it "should append to file" do
237
+ @unique_driver.run
238
+
239
+ result = File.read("/tmp/pgdist.pgdist_test20130430.seq").chomp
240
+ result.should == "1"
241
+
242
+ result = File.read("/tmp/pgdist.pgdist_test20130430").split(/\n/).map{|l|l.split(/\t/)}
243
+ result.shift.should == ["1", "100", "2013-04-30 01:23:45", "{\"id\":\"100\",\"created_at\":\"2013-04-30T01:23:45Z\",\"text\":\"message1\"}"]
244
+ result.size.should == 0
245
+
246
+ result = File.read("/tmp/pgdist.pgdist_test20130501.seq").chomp
247
+ result.should == "2"
248
+
249
+ result = File.read("/tmp/pgdist.pgdist_test20130501").split(/\n/).map{|l|l.split(/\t/)}
250
+
251
+ result.shift.should == ["1", "101", "2013-05-01T01:23:45Z", "dummy"]
252
+ result.shift.should == ["2", "102", "2013-05-01 01:23:46", "{\"id\":\"102\",\"created_at\":\"2013-05-01T01:23:46Z\",\"text\":\"message3\"}"]
253
+ result.size.should == 0
88
254
  end
89
- @driver.run
90
- result = @connection.exec("select * from pgdist.pgdist_test20130430")
91
-
92
- record = result[0]
93
- record["seq"].should == "1"
94
- record["id"].should == "100"
95
- record["created_at"].should == "2013-04-30 01:23:45"
96
- json = record["value"]
97
- hash = JSON.parse(json)
98
- hash["id"].should == "100"
99
- hash["created_at"].should == "2013-04-30T01:23:45Z"
100
- hash["text"].should == "message1"
101
-
102
- result = @connection.exec("select * from pgdist.pgdist_test20130501")
103
-
104
- record = result[0]
105
- record["seq"].should == "1"
106
- record["id"].should == "101"
107
- record["created_at"].should == "2013-05-01 01:23:45"
108
- json = record["value"]
109
- hash = JSON.parse(json)
110
- hash["id"].should == "101"
111
- hash["created_at"].should == "2013-05-01T01:23:45Z"
112
- hash["text"].should == "message2"
113
-
114
- @connection.close
115
255
  end
116
256
  end
117
- end
118
257
 
258
+ describe "#write_pg_result(output_stream, file_fields, pg_result)" do
259
+ before :all do
260
+ @pg_result = @connection.exec(%[
261
+ (select 1 as seq, '100' as id, E'test\\ndata1' as value)
262
+ union
263
+ (select 2 as seq, '101' as id, E'test\\ndata2' as value)
264
+ ])
265
+ end
266
+
267
+ after :all do
268
+ end
269
+
270
+ before :each do
271
+ pipe = IO.pipe
272
+ @output_reader = pipe[0]
273
+ @output_writer = pipe[1]
274
+ end
275
+
276
+ after :each do
277
+ @output_reader.close unless @output_reader.closed?
278
+ @output_writer.close unless @output_writer.closed?
279
+ end
280
+
281
+ context "when file_format = json" do
282
+ before :each do
283
+ json_conf = @default_conf + %[
284
+ file_format json
285
+ file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
286
+ ]
287
+ @json_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(json_conf)
288
+ end
289
+
290
+ it "should output json data" do
291
+ @json_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
292
+ @output_writer.close
293
+ result = @output_reader.gets.chomp
294
+ result = JSON.parse(result)
295
+ result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
296
+ result = @output_reader.gets.chomp
297
+ result = JSON.parse(result)
298
+ result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
299
+ end
300
+ end
301
+
302
+ context "when file_format = ltsv" do
303
+ before :each do
304
+ ltsv_conf = @default_conf + %[
305
+ file_format ltsv
306
+ file_record_filter {|f,r|[["seq","id","text"],[r[0],r[1],r[2]]]}
307
+ ]
308
+ @ltsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(ltsv_conf)
309
+ end
310
+
311
+ it "should output ltsv data" do
312
+ @ltsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
313
+ @output_writer.close
314
+ result = @output_reader.gets.chomp
315
+ result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
316
+ result.should == {"seq"=>"1", "id"=>"100", "text"=>"test\\ndata1"}
317
+ result = @output_reader.gets.chomp
318
+ result = Hash[result.split(/\t/).map{|p|p.split(/:/, 2)}]
319
+ result.should == {"seq"=>"2", "id"=>"101", "text"=>"test\\ndata2"}
320
+ @output_reader.gets.should be_nil
321
+ end
322
+ end
323
+
324
+ context "when file_format = msgpack" do
325
+ before :each do
326
+ msgpack_conf = @default_conf + %[
327
+ file_format msgpack
328
+ file_record_filter {|r|{"seq"=>r["seq"],"id"=>r["id"],"text"=>r["value"]}}
329
+ ]
330
+ @msgpack_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(msgpack_conf)
331
+ end
332
+
333
+ it "should output message_pack data" do
334
+ @msgpack_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
335
+ @output_writer.close
336
+ unpacker = MessagePack::Unpacker.new(@output_reader)
337
+ result = []
338
+ begin
339
+ unpacker.each do |record|
340
+ result.push record
341
+ end
342
+ rescue EOFError => e
343
+ end
344
+ result.shift.should == {"seq"=>"1", "id"=>"100", "text"=>"test\ndata1"}
345
+ result.shift.should == {"seq"=>"2", "id"=>"101", "text"=>"test\ndata2"}
346
+ end
347
+ end
348
+
349
+ context "when file_format = raw" do
350
+ before :each do
351
+ tsv_conf = @default_conf + "file_format raw\n"
352
+ @tsv_driver = Fluent::Test::BufferedOutputTestDriver.new(Fluent::PgdistOutput, "pgdist.test").configure(tsv_conf)
353
+ end
354
+ it "should output tsv data" do
355
+ @tsv_driver.instance.write_pg_result(@output_writer, ["seq", "id", "value"], @pg_result)
356
+ @output_writer.close
357
+ result = @output_reader.gets.chomp
358
+ result = result.split(/\t/)
359
+ result.should == ["1", "100", "test\\ndata1"]
360
+ result = @output_reader.gets.chomp
361
+ result = result.split(/\t/)
362
+ result.should == ["2", "101", "test\\ndata2"]
363
+ @output_reader.gets.should be_nil
364
+ end
365
+ end
366
+ end
367
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-pgdist
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,40 +9,40 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-01 00:00:00.000000000 Z
12
+ date: 2013-05-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ! '>='
19
+ - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 0.10.33
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
- - - ! '>='
27
+ - - ~>
28
28
  - !ruby/object:Gem::Version
29
- version: '0'
29
+ version: 0.10.33
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: pg
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - ! '>='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: '0'
37
+ version: 0.15.1
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - ! '>='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: '0'
45
+ version: 0.15.1
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: rspec
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -237,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
237
237
  version: '0'
238
238
  segments:
239
239
  - 0
240
- hash: 4199809916756292803
240
+ hash: -910513567270313927
241
241
  required_rubygems_version: !ruby/object:Gem::Requirement
242
242
  none: false
243
243
  requirements: