fluent-plugin-redshift 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,131 @@
1
+ Amazon Redshift output plugin for Fluentd
2
+ ========
3
+
4
+ ## Overview
5
+
6
+ Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
7
+
8
+ ## Installation
9
+
10
+ gem install fluent-plugin-redshift
11
+
12
+ ## Configuration
13
+
14
+ Format:
15
+
16
+ <match my.tag>
17
+ type redshift
18
+
19
+ # s3 (for copying data to redshift)
20
+ aws_key_id YOUR_AWS_KEY_ID
21
+ aws_sec_key YOUR_AWS_SECRET_KEY
22
+ s3_bucket YOUR_S3_BUCKET
23
+ s3_endpoint YOUR_S3_BUCKET_END_POINT
24
+ path YOUR_S3_PATH
25
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
26
+
27
+ # redshift
28
+ redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
29
+ redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
30
+ redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
31
+ redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
32
+ redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
33
+ redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
34
+ file_type [tsv|csv|json]
35
+
36
+ # buffer
37
+ buffer_type file
38
+ buffer_path /var/log/fluent/redshift
39
+ flush_interval 15m
40
+ buffer_chunk_limit 1g
41
+ </match>
42
+
43
+ Example (watch and upload json formatted apache log):
44
+
45
+ <source>
46
+ type tail
47
+ path redshift_test.json
48
+ pos_file redshift_test_json.pos
49
+ tag redshift.json
50
+ format /^(?<log>.*)$/
51
+ </source>
52
+
53
+ <match redshift.json>
54
+ type redshift
55
+
56
+ # s3 (for copying data to redshift)
57
+ aws_key_id YOUR_AWS_KEY_ID
58
+ aws_sec_key YOUR_AWS_SECRET_KEY
59
+ s3_bucket hapyrus-example
60
+ s3_endpoint s3.amazonaws.com
61
+ path apache_json_log
62
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
63
+
64
+ # redshift
65
+ redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
66
+ redshift_port 5439
67
+ redshift_dbname fluent-redshift-test
68
+ redshift_user fluent
69
+ redshift_password fluent-password
70
+ redshift_tablename apache_log
71
+ file_type json
72
+
73
+ # buffer
74
+ buffer_type file
75
+ buffer_path /var/log/fluent/redshift
76
+ flush_interval 15m
77
+ buffer_chunk_limit 1g
78
+ <match>
79
+
80
+ + `type` (required) : The value must be `redshift`.
81
+
82
+ + `aws_key_id` (required) : AWS access key id to access s3 bucket.
83
+
84
+ + `aws_sec_key` (required) : AWS securet key id to access s3 bucket.
85
+
86
+ + `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
87
+
88
+ + `s3_endpoint` : s3 endpoint.
89
+
90
+ + `path` (required) : s3 path to input.
91
+
92
+ + `timestamp_key_format` : The format of the object keys. It can include date-format directives.
93
+
94
+ - Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
95
+ - For example, the s3 path is as following with the above example configration.
96
+ <pre>
97
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
98
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
99
+ </pre>
100
+
101
+ + `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
102
+
103
+ + `redshift_port` (required) : port number.
104
+
105
+ + `redshift_dbname` (required) : database name.
106
+
107
+ + `redshift_user` (required) : user name.
108
+
109
+ + `redshift_password` (required) : password for the user name.
110
+
111
+ + `redshift_tablename` (required) : table name to store data.
112
+
113
+ + `file_type` : file format of the source data. `csv`, `tsv` or `json` are available.
114
+
115
+ + `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
116
+
117
+ + `buffer_type` : buffer type.
118
+
119
+ + `buffer_path` : path prefix of the files to buffer logs.
120
+
121
+ + `flush_interval` : flush interval.
122
+
123
+ + `buffer_chunk_limit` : limit buffer size to chunk.
124
+
125
+ + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
126
+
127
+ ## License
128
+
129
+ Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
130
+
131
+ [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
@@ -0,0 +1,16 @@
1
+ require "bundler"
2
+ Bundler::GemHelper.install_tasks
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.test_files = FileList['test/plugin/*.rb']
8
+ test.verbose = true
9
+ end
10
+
11
+ task :coverage do |t|
12
+ ENV['COVERAGE'] = '1'
13
+ Rake::Task["test"].invoke
14
+ end
15
+
16
+ task :default => [:build]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-redshift"
6
+ gem.version = File.read("VERSION").strip
7
+ gem.authors = ["Masashi Miyazaki"]
8
+ gem.email = ["mmasashi@gmail.com"]
9
+ gem.description = %q{Amazon Redshift output plugin for Fluentd}
10
+ gem.summary = gem.description
11
+ gem.homepage = "https://github.com/hapyrus/fluent-plugin-redshift"
12
+ gem.has_rdoc = false
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_dependency "fluentd", "~> 0.10.0"
20
+ gem.add_dependency "aws-sdk", ">= 1.6.3"
21
+ gem.add_dependency "pg", "~> 0.14.0"
22
+ gem.add_development_dependency "rake"
23
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
24
+ gem.add_development_dependency "flexmock", ">= 1.3.1"
25
+ end
@@ -0,0 +1,230 @@
1
+ module Fluent
2
+
3
+
4
+ class RedshiftOutput < BufferedOutput
5
+ Fluent::Plugin.register_output('redshift', self)
6
+
7
+ def initialize
8
+ super
9
+ require 'aws-sdk'
10
+ require 'zlib'
11
+ require 'time'
12
+ require 'tempfile'
13
+ require 'pg'
14
+ require 'json'
15
+ require 'csv'
16
+ end
17
+
18
+ config_param :record_log_tag, :string, :default => 'log'
19
+ # s3
20
+ config_param :aws_key_id, :string
21
+ config_param :aws_sec_key, :string
22
+ config_param :s3_bucket, :string
23
+ config_param :s3_endpoint, :string, :default => nil
24
+ config_param :path, :string, :default => ""
25
+ config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
26
+ config_param :utc, :bool, :default => false
27
+ # redshift
28
+ config_param :redshift_host, :string
29
+ config_param :redshift_port, :integer, :default => 5439
30
+ config_param :redshift_dbname, :string
31
+ config_param :redshift_user, :string
32
+ config_param :redshift_password, :string
33
+ config_param :redshift_tablename, :string
34
+ # file format
35
+ config_param :file_type, :string, :default => nil # json, tsv, csv
36
+ config_param :delimiter, :string, :default => nil
37
+
38
+ def configure(conf)
39
+ super
40
+ @path = "#{@path}/" if /.+[^\/]$/ =~ @path
41
+ @path = "" if @path == "/"
42
+ @utc = true if conf['utc']
43
+ @db_conf = {
44
+ host:@redshift_host,
45
+ port:@redshift_port,
46
+ dbname:@redshift_dbname,
47
+ user:@redshift_user,
48
+ password:@redshift_password
49
+ }
50
+ @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
51
+ $log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
52
+ @copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' REMOVEQUOTES GZIP;"
53
+ end
54
+
55
+ def start
56
+ super
57
+ # init s3 conf
58
+ options = {
59
+ :access_key_id => @aws_key_id,
60
+ :secret_access_key => @aws_sec_key
61
+ }
62
+ options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
63
+ @s3 = AWS::S3.new(options)
64
+ @bucket = @s3.buckets[@s3_bucket]
65
+ end
66
+
67
+ def format(tag, time, record)
68
+ (json?) ? record.to_msgpack : "#{record[@record_log_tag]}\n"
69
+ end
70
+
71
+ def write(chunk)
72
+ # create a gz file
73
+ tmp = Tempfile.new("s3-")
74
+ tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
75
+ : create_gz_file_from_msgpack(tmp, chunk)
76
+
77
+ # no data -> skip
78
+ unless tmp
79
+ $log.debug "received no valid data. "
80
+ return
81
+ end
82
+
83
+ # create a file path with time format
84
+ s3path = create_s3path(@bucket, @path)
85
+
86
+ # upload gz to s3
87
+ @bucket.objects[s3path].write(Pathname.new(tmp.path),
88
+ :acl => :bucket_owner_full_control)
89
+ # copy gz on s3 to redshift
90
+ s3_uri = "s3://#{@s3_bucket}/#{s3path}"
91
+ sql = @copy_sql_template % [s3_uri, @aws_sec_key]
92
+ $log.debug "start copying. s3_uri=#{s3_uri}"
93
+ conn = nil
94
+ begin
95
+ conn = PG.connect(@db_conf)
96
+ conn.exec(sql)
97
+ $log.info "completed copying to redshift. s3_uri=#{s3_uri}"
98
+ rescue PG::Error => e
99
+ $log.error "failed to copy data into redshift. sql=#{s3_uri}", :error=>e.to_s
100
+ raise e if e.result.nil? # retry if connection errors
101
+ ensure
102
+ conn.close rescue nil if conn
103
+ end
104
+ end
105
+
106
+ private
107
+ def json?
108
+ @file_type == 'json'
109
+ end
110
+
111
+ def create_gz_file_from_msgpack(dst_file, chunk)
112
+ gzw = nil
113
+ begin
114
+ gzw = Zlib::GzipWriter.new(dst_file)
115
+ chunk.write_to(gzw)
116
+ ensure
117
+ gzw.close rescue nil if gzw
118
+ end
119
+ dst_file
120
+ end
121
+
122
+ def create_gz_file_from_json(dst_file, chunk, delimiter)
123
+ # fetch the table definition from redshift
124
+ redshift_table_columns = fetch_table_columns
125
+ if redshift_table_columns == nil
126
+ raise "failed to fetch the redshift table definition."
127
+ elsif redshift_table_columns.empty?
128
+ $log.warn "no table on redshift. table_name=#{@redshift_tablename}"
129
+ return nil
130
+ end
131
+
132
+ # convert json to tsv format text
133
+ table_texts = ""
134
+ chunk.msgpack_each do |record|
135
+ begin
136
+ table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
137
+ rescue => e
138
+ $log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
139
+ $log.error_backtrace
140
+ end
141
+ end
142
+ return nil if table_texts.empty?
143
+
144
+ # create gz
145
+ gzw = nil
146
+ begin
147
+ gzw = Zlib::GzipWriter.new(dst_file)
148
+ gzw.write(table_texts)
149
+ ensure
150
+ gzw.close rescue nil if gzw
151
+ end
152
+ dst_file
153
+ end
154
+
155
+ def determine_delimiter(file_type)
156
+ case file_type
157
+ when 'json', 'tsv'
158
+ "\t"
159
+ when "csv"
160
+ ','
161
+ else
162
+ raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
163
+ end
164
+ end
165
+
166
+ def fetch_table_columns
167
+ fetch_columns_sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
168
+ conn = PG.connect(@db_conf)
169
+ begin
170
+ columns = nil
171
+ conn.exec(fetch_columns_sql) do |result|
172
+ columns = result.collect{|row| row['column_name']}
173
+ end
174
+ columns
175
+ ensure
176
+ conn.close rescue nil
177
+ end
178
+ end
179
+
180
+ def json_to_table_text(redshift_table_columns, json_text, delimiter)
181
+ return "" if json_text.nil? or json_text.empty?
182
+
183
+ # parse json text
184
+ json_obj = nil
185
+ begin
186
+ json_obj = JSON.parse(json_text)
187
+ rescue => e
188
+ $log.warn "failed to parse json. ", :error=>e.to_s
189
+ return ""
190
+ end
191
+ return "" unless json_obj
192
+
193
+ # extract values from json
194
+ val_list = redshift_table_columns.collect do |cn|
195
+ val = json_obj[cn]
196
+ val = nil unless val and not val.to_s.empty?
197
+ val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
198
+ val.to_s unless val.nil?
199
+ end
200
+ if val_list.all?{|v| v.nil? or v.empty?}
201
+ $log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
202
+ return ""
203
+ end
204
+
205
+ # generate tsv text
206
+ begin
207
+ CSV.generate(:col_sep=>delimiter, :quote_char => '"') do |row|
208
+ row << val_list # inlude new line
209
+ end
210
+ rescue => e
211
+ $log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
212
+ raise e
213
+ end
214
+ end
215
+
216
+ def create_s3path(bucket, path)
217
+ timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
218
+ i = 0
219
+ begin
220
+ suffix = "_#{'%02d' % i}"
221
+ s3path = "#{path}#{timestamp_key}#{suffix}.gz"
222
+ i += 1
223
+ end while bucket.objects[s3path].exists?
224
+ s3path
225
+ end
226
+
227
+ end
228
+
229
+
230
+ end
@@ -0,0 +1,395 @@
1
+ require 'test_helper'
2
+
3
+ require 'fluent/test'
4
+ require 'fluent/plugin/out_redshift'
5
+ require 'flexmock/test_unit'
6
+ require 'zlib'
7
+
8
+
9
+ class RedshiftOutputTest < Test::Unit::TestCase
10
+ def setup
11
+ require 'aws-sdk'
12
+ require 'pg'
13
+ require 'csv'
14
+ Fluent::Test.setup
15
+ end
16
+
17
+ CONFIG_BASE= %[
18
+ aws_key_id test_key_id
19
+ aws_sec_key test_sec_key
20
+ s3_bucket test_bucket
21
+ path log
22
+ redshift_host test_host
23
+ redshift_dbname test_db
24
+ redshift_user test_user
25
+ redshift_password test_password
26
+ redshift_tablename test_table
27
+ buffer_type memory
28
+ utc
29
+ ]
30
+ CONFIG_CSV= %[
31
+ #{CONFIG_BASE}
32
+ file_type csv
33
+ ]
34
+ CONFIG_TSV= %[
35
+ #{CONFIG_BASE}
36
+ file_type tsv
37
+ ]
38
+ CONFIG_JSON = %[
39
+ #{CONFIG_BASE}
40
+ file_type json
41
+ ]
42
+ CONFIG_PIPE_DELIMITER= %[
43
+ #{CONFIG_BASE}
44
+ delimiter |
45
+ ]
46
+ CONFIG_PIPE_DELIMITER_WITH_NAME= %[
47
+ #{CONFIG_BASE}
48
+ file_type pipe
49
+ delimiter |
50
+ ]
51
+ CONFIG=CONFIG_CSV
52
+
53
+ RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
54
+ RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
55
+ RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
56
+ RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
57
+ RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
58
+ RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
59
+ DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
60
+
61
+ def create_driver(conf = CONFIG, tag='test.input')
62
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
63
+ end
64
+
65
+ def create_driver_no_write(conf = CONFIG, tag='test.input')
66
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
67
+ def write(chunk)
68
+ chunk.read
69
+ end
70
+ end.configure(conf)
71
+ end
72
+
73
+ def test_configure
74
+ assert_raise(Fluent::ConfigError) {
75
+ d = create_driver('')
76
+ }
77
+ assert_raise(Fluent::ConfigError) {
78
+ d = create_driver(CONFIG_BASE)
79
+ }
80
+ d = create_driver(CONFIG_CSV)
81
+ assert_equal "test_key_id", d.instance.aws_key_id
82
+ assert_equal "test_sec_key", d.instance.aws_sec_key
83
+ assert_equal "test_bucket", d.instance.s3_bucket
84
+ assert_equal "log/", d.instance.path
85
+ assert_equal "test_host", d.instance.redshift_host
86
+ assert_equal 5439, d.instance.redshift_port
87
+ assert_equal "test_db", d.instance.redshift_dbname
88
+ assert_equal "test_user", d.instance.redshift_user
89
+ assert_equal "test_password", d.instance.redshift_password
90
+ assert_equal "test_table", d.instance.redshift_tablename
91
+ assert_equal "csv", d.instance.file_type
92
+ assert_equal ",", d.instance.delimiter
93
+ assert_equal true, d.instance.utc
94
+ end
95
+ def test_configure_localtime
96
+ d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
97
+ assert_equal false, d.instance.utc
98
+ end
99
+ def test_configure_no_path
100
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
101
+ assert_equal "", d.instance.path
102
+ end
103
+ def test_configure_root_path
104
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
105
+ assert_equal "", d.instance.path
106
+ end
107
+ def test_configure_path_with_slash
108
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
109
+ assert_equal "log/", d.instance.path
110
+ end
111
+ def test_configure_tsv
112
+ d1 = create_driver(CONFIG_TSV)
113
+ assert_equal "tsv", d1.instance.file_type
114
+ assert_equal "\t", d1.instance.delimiter
115
+ end
116
+ def test_configure_json
117
+ d2 = create_driver(CONFIG_JSON)
118
+ assert_equal "json", d2.instance.file_type
119
+ assert_equal "\t", d2.instance.delimiter
120
+ end
121
+ def test_configure_original_file_type
122
+ d3 = create_driver(CONFIG_PIPE_DELIMITER)
123
+ assert_equal nil, d3.instance.file_type
124
+ assert_equal "|", d3.instance.delimiter
125
+
126
+ d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
127
+ assert_equal "pipe", d4.instance.file_type
128
+ assert_equal "|", d4.instance.delimiter
129
+ end
130
+
131
+ def emit_csv(d)
132
+ d.emit(RECORD_CSV_A, DEFAULT_TIME)
133
+ d.emit(RECORD_CSV_B, DEFAULT_TIME)
134
+ end
135
+ def emit_tsv(d)
136
+ d.emit(RECORD_TSV_A, DEFAULT_TIME)
137
+ d.emit(RECORD_TSV_B, DEFAULT_TIME)
138
+ end
139
+ def emit_json(d)
140
+ d.emit(RECORD_JSON_A, DEFAULT_TIME)
141
+ d.emit(RECORD_JSON_B, DEFAULT_TIME)
142
+ end
143
+
144
+ def test_format_csv
145
+ d_csv = create_driver_no_write(CONFIG_CSV)
146
+ emit_csv(d_csv)
147
+ d_csv.expect_format RECORD_CSV_A['log'] + "\n"
148
+ d_csv.expect_format RECORD_CSV_B['log'] + "\n"
149
+ d_csv.run
150
+ end
151
+ def test_format_tsv
152
+ d_tsv = create_driver_no_write(CONFIG_TSV)
153
+ emit_tsv(d_tsv)
154
+ d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
155
+ d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
156
+ d_tsv.run
157
+ end
158
+ def test_format_json
159
+ d_json = create_driver_no_write(CONFIG_JSON)
160
+ emit_json(d_json)
161
+ d_json.expect_format RECORD_JSON_A.to_msgpack
162
+ d_json.expect_format RECORD_JSON_B.to_msgpack
163
+ d_json.run
164
+ end
165
+
166
+ class PGConnectionMock
167
+ def initialize(return_keys=['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h'])
168
+ @return_keys = return_keys
169
+ end
170
+ def exec(sql, &block)
171
+ if block_given? and /^select column_name from/ =~ sql
172
+ yield @return_keys.collect{|key| {'column_name' => key}}
173
+ end
174
+ end
175
+ def close
176
+ end
177
+ end
178
+
179
+ def setup_pg_mock
180
+ # create mock of PG
181
+ def PG.connect(dbinfo)
182
+ return PGConnectionMock.new
183
+ end
184
+ end
185
+
186
+ def setup_s3_mock(expected_data)
187
+ current_time = Time.now
188
+
189
+ # create mock of s3 object
190
+ s3obj = flexmock(AWS::S3::S3Object)
191
+ s3obj.should_receive(:exists?).with_any_args.and_return { false }
192
+ s3obj.should_receive(:write).with(
193
+ # pathname
194
+ on { |pathname|
195
+ data = nil
196
+ pathname.open { |f|
197
+ gz = Zlib::GzipReader.new(f)
198
+ data = gz.read
199
+ gz.close
200
+ }
201
+ assert_equal expected_data, data
202
+ },
203
+ :acl => :bucket_owner_full_control
204
+ ).and_return { true }
205
+
206
+ # create mock of s3 object collection
207
+ s3obj_col = flexmock(AWS::S3::ObjectCollection)
208
+ s3obj_col.should_receive(:[]).with(
209
+ on { |key|
210
+ expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
211
+ key == expected_key
212
+ }).
213
+ and_return {
214
+ s3obj
215
+ }
216
+
217
+ # create mock of s3 bucket
218
+ flexmock(AWS::S3::Bucket).new_instances do |bucket|
219
+ bucket.should_receive(:objects).with_any_args.
220
+ and_return {
221
+ s3obj_col
222
+ }
223
+ end
224
+ end
225
+
226
+ def setup_mocks(expected_data)
227
+ setup_pg_mock
228
+ setup_s3_mock(expected_data) end
229
+
230
+ def test_write_with_csv
231
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
232
+ d_csv = create_driver
233
+ emit_csv(d_csv)
234
+ d_csv.run
235
+ end
236
+
237
+ def test_write_with_json
238
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
239
+ d_json = create_driver(CONFIG_JSON)
240
+ emit_json(d_json)
241
+ d_json.run
242
+ end
243
+
244
+ def test_write_with_json_hash_value
245
+ setup_mocks("val_a\t\"{\"\"foo\"\":\"\"var\"\"}\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
246
+ d_json = create_driver(CONFIG_JSON)
247
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
248
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
249
+ d_json.run
250
+ end
251
+
252
+ def test_write_with_json_array_value
253
+ setup_mocks("val_a\t\"[\"\"foo\"\",\"\"var\"\"]\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
254
+ d_json = create_driver(CONFIG_JSON)
255
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
256
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
257
+ d_json.run
258
+ end
259
+
260
+ def test_write_with_json_no_data
261
+ setup_mocks("")
262
+ d_json = create_driver(CONFIG_JSON)
263
+ d_json.emit("", DEFAULT_TIME)
264
+ d_json.emit("", DEFAULT_TIME)
265
+ d_json.run
266
+ end
267
+
268
+ def test_write_with_json_invalid_one_line
269
+ setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
270
+ d_json = create_driver(CONFIG_JSON)
271
+ d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
272
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
273
+ d_json.run
274
+ end
275
+
276
+ def test_write_with_json_no_available_data
277
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
278
+ d_json = create_driver(CONFIG_JSON)
279
+ d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
280
+ d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
281
+ d_json.run
282
+ end
283
+
284
+ def test_write_redshift_connection_error
285
+ def PG.connect(dbinfo)
286
+ return Class.new do
287
+ def initialize(return_keys=[]); end
288
+ def exec(sql)
289
+ raise PG::Error, "redshift connection error"
290
+ end
291
+ def close; end
292
+ end.new
293
+ end
294
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
295
+
296
+ d_csv = create_driver
297
+ emit_csv(d_csv)
298
+ assert_raise(PG::Error) {
299
+ d_csv.run
300
+ }
301
+ end
302
+
303
+ def test_write_redshift_logic_error
304
+ PG::Error.module_eval { attr_accessor :result}
305
+ def PG.connect(dbinfo)
306
+ return Class.new do
307
+ def initialize(return_keys=[]); end
308
+ def exec(sql)
309
+ error = PG::Error.new("redshift logic error")
310
+ error.result = "logic error"
311
+ raise error
312
+ end
313
+ def close; end
314
+ end.new
315
+ end
316
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
317
+
318
+ d_csv = create_driver
319
+ emit_csv(d_csv)
320
+ assert_nothing_raised {
321
+ d_csv.run
322
+ }
323
+ end
324
+
325
+ def test_write_with_json_redshift_connection_error
326
+ def PG.connect(dbinfo)
327
+ return Class.new do
328
+ def initialize(return_keys=[]); end
329
+ def exec(sql, &block)
330
+ error = PG::Error.new("redshift connection error")
331
+ raise error
332
+ end
333
+ def close; end
334
+ end.new
335
+ end
336
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
337
+
338
+ d_json = create_driver(CONFIG_JSON)
339
+ emit_json(d_json)
340
+ assert_raise(PG::Error) {
341
+ d_json.run
342
+ }
343
+ end
344
+
345
+ def test_write_with_json_no_table_on_redshift
346
+ def PG.connect(dbinfo)
347
+ return Class.new do
348
+ def initialize(return_keys=[]); end
349
+ def exec(sql, &block)
350
+ yield [] if block_given?
351
+ end
352
+ def close; end
353
+ end.new
354
+ end
355
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
356
+
357
+ d_json = create_driver(CONFIG_JSON)
358
+ emit_json(d_json)
359
+ assert_nothing_raised {
360
+ d_json.run
361
+ }
362
+ end
363
+
364
+ def test_write_with_json_failed_to_get_columns
365
+ def PG.connect(dbinfo)
366
+ return Class.new do
367
+ def initialize(return_keys=[]); end
368
+ def exec(sql, &block)
369
+ end
370
+ def close; end
371
+ end.new
372
+ end
373
+ setup_s3_mock("")
374
+
375
+ d_json = create_driver(CONFIG_JSON)
376
+ emit_json(d_json)
377
+ assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
378
+ d_json.run
379
+ }
380
+ end
381
+
382
+ def test_write_with_json_failed_to_generate_tsv
383
+ flexmock(CSV).should_receive(:generate).with_any_args.
384
+ and_return {
385
+ raise "failed to generate tsv."
386
+ }
387
+ setup_s3_mock("")
388
+
389
+ d_json = create_driver(CONFIG_JSON)
390
+ emit_json(d_json)
391
+ assert_nothing_raised {
392
+ d_json.run
393
+ }
394
+ end
395
+ end
@@ -0,0 +1,8 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter 'test/'
5
+ add_filter 'pkg/'
6
+ add_filter 'vendor/'
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-redshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Masashi Miyazaki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.10.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: aws-sdk
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 1.6.3
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 1.6.3
46
+ - !ruby/object:Gem::Dependency
47
+ name: pg
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.14.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: simplecov
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 0.5.4
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 0.5.4
94
+ - !ruby/object:Gem::Dependency
95
+ name: flexmock
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 1.3.1
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.3.1
110
+ description: Amazon Redshift output plugin for Fluentd
111
+ email:
112
+ - mmasashi@gmail.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - .gitignore
118
+ - Gemfile
119
+ - README.md
120
+ - Rakefile
121
+ - VERSION
122
+ - fluent-plugin-redshift.gemspec
123
+ - lib/fluent/plugin/out_redshift.rb
124
+ - test/plugin/test_out_redshift.rb
125
+ - test/test_helper.rb
126
+ homepage: https://github.com/hapyrus/fluent-plugin-redshift
127
+ licenses: []
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ! '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 1.8.23
147
+ signing_key:
148
+ specification_version: 3
149
+ summary: Amazon Redshift output plugin for Fluentd
150
+ test_files:
151
+ - test/plugin/test_out_redshift.rb
152
+ - test/test_helper.rb