fluent-plugin-redshift 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,131 @@
1
+ Amazon Redshift output plugin for Fluentd
2
+ ========
3
+
4
+ ## Overview
5
+
6
+ Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
7
+
8
+ ## Installation
9
+
10
+ gem install fluent-plugin-redshift
11
+
12
+ ## Configuration
13
+
14
+ Format:
15
+
16
+ <match my.tag>
17
+ type redshift
18
+
19
+ # s3 (for copying data to redshift)
20
+ aws_key_id YOUR_AWS_KEY_ID
21
+ aws_sec_key YOUR_AWS_SECRET_KEY
22
+ s3_bucket YOUR_S3_BUCKET
23
+ s3_endpoint YOUR_S3_BUCKET_END_POINT
24
+ path YOUR_S3_PATH
25
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
26
+
27
+ # redshift
28
+ redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
29
+ redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
30
+ redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
31
+ redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
32
+ redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
33
+ redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
34
+ file_type [tsv|csv|json]
35
+
36
+ # buffer
37
+ buffer_type file
38
+ buffer_path /var/log/fluent/redshift
39
+ flush_interval 15m
40
+ buffer_chunk_limit 1g
41
+ </match>
42
+
43
+ Example (watch and upload json formatted apache log):
44
+
45
+ <source>
46
+ type tail
47
+ path redshift_test.json
48
+ pos_file redshift_test_json.pos
49
+ tag redshift.json
50
+ format /^(?<log>.*)$/
51
+ </source>
52
+
53
+ <match redshift.json>
54
+ type redshift
55
+
56
+ # s3 (for copying data to redshift)
57
+ aws_key_id YOUR_AWS_KEY_ID
58
+ aws_sec_key YOUR_AWS_SECRET_KEY
59
+ s3_bucket hapyrus-example
60
+ s3_endpoint s3.amazonaws.com
61
+ path apache_json_log
62
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
63
+
64
+ # redshift
65
+ redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
66
+ redshift_port 5439
67
+ redshift_dbname fluent-redshift-test
68
+ redshift_user fluent
69
+ redshift_password fluent-password
70
+ redshift_tablename apache_log
71
+ file_type json
72
+
73
+ # buffer
74
+ buffer_type file
75
+ buffer_path /var/log/fluent/redshift
76
+ flush_interval 15m
77
+ buffer_chunk_limit 1g
78
+ <match>
79
+
80
+ + `type` (required) : The value must be `redshift`.
81
+
82
+ + `aws_key_id` (required) : AWS access key id to access s3 bucket.
83
+
84
+ + `aws_sec_key` (required) : AWS securet key id to access s3 bucket.
85
+
86
+ + `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
87
+
88
+ + `s3_endpoint` : s3 endpoint.
89
+
90
+ + `path` (required) : s3 path to input.
91
+
92
+ + `timestamp_key_format` : The format of the object keys. It can include date-format directives.
93
+
94
+ - Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
95
+ - For example, the s3 path is as following with the above example configration.
96
+ <pre>
97
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
98
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
99
+ </pre>
100
+
101
+ + `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
102
+
103
+ + `redshift_port` (required) : port number.
104
+
105
+ + `redshift_dbname` (required) : database name.
106
+
107
+ + `redshift_user` (required) : user name.
108
+
109
+ + `redshift_password` (required) : password for the user name.
110
+
111
+ + `redshift_tablename` (required) : table name to store data.
112
+
113
+ + `file_type` : file format of the source data. `csv`, `tsv` or `json` are available.
114
+
115
+ + `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
116
+
117
+ + `buffer_type` : buffer type.
118
+
119
+ + `buffer_path` : path prefix of the files to buffer logs.
120
+
121
+ + `flush_interval` : flush interval.
122
+
123
+ + `buffer_chunk_limit` : limit buffer size to chunk.
124
+
125
+ + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
126
+
127
+ ## License
128
+
129
+ Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
130
+
131
+ [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
@@ -0,0 +1,16 @@
1
+ require "bundler"
2
+ Bundler::GemHelper.install_tasks
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.test_files = FileList['test/plugin/*.rb']
8
+ test.verbose = true
9
+ end
10
+
11
+ task :coverage do |t|
12
+ ENV['COVERAGE'] = '1'
13
+ Rake::Task["test"].invoke
14
+ end
15
+
16
+ task :default => [:build]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-redshift"
6
+ gem.version = File.read("VERSION").strip
7
+ gem.authors = ["Masashi Miyazaki"]
8
+ gem.email = ["mmasashi@gmail.com"]
9
+ gem.description = %q{Amazon Redshift output plugin for Fluentd}
10
+ gem.summary = gem.description
11
+ gem.homepage = "https://github.com/hapyrus/fluent-plugin-redshift"
12
+ gem.has_rdoc = false
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_dependency "fluentd", "~> 0.10.0"
20
+ gem.add_dependency "aws-sdk", ">= 1.6.3"
21
+ gem.add_dependency "pg", "~> 0.14.0"
22
+ gem.add_development_dependency "rake"
23
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
24
+ gem.add_development_dependency "flexmock", ">= 1.3.1"
25
+ end
@@ -0,0 +1,230 @@
1
+ module Fluent
2
+
3
+
4
+ class RedshiftOutput < BufferedOutput
5
+ Fluent::Plugin.register_output('redshift', self)
6
+
7
+ def initialize
8
+ super
9
+ require 'aws-sdk'
10
+ require 'zlib'
11
+ require 'time'
12
+ require 'tempfile'
13
+ require 'pg'
14
+ require 'json'
15
+ require 'csv'
16
+ end
17
+
18
+ config_param :record_log_tag, :string, :default => 'log'
19
+ # s3
20
+ config_param :aws_key_id, :string
21
+ config_param :aws_sec_key, :string
22
+ config_param :s3_bucket, :string
23
+ config_param :s3_endpoint, :string, :default => nil
24
+ config_param :path, :string, :default => ""
25
+ config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
26
+ config_param :utc, :bool, :default => false
27
+ # redshift
28
+ config_param :redshift_host, :string
29
+ config_param :redshift_port, :integer, :default => 5439
30
+ config_param :redshift_dbname, :string
31
+ config_param :redshift_user, :string
32
+ config_param :redshift_password, :string
33
+ config_param :redshift_tablename, :string
34
+ # file format
35
+ config_param :file_type, :string, :default => nil # json, tsv, csv
36
+ config_param :delimiter, :string, :default => nil
37
+
38
+ def configure(conf)
39
+ super
40
+ @path = "#{@path}/" if /.+[^\/]$/ =~ @path
41
+ @path = "" if @path == "/"
42
+ @utc = true if conf['utc']
43
+ @db_conf = {
44
+ host:@redshift_host,
45
+ port:@redshift_port,
46
+ dbname:@redshift_dbname,
47
+ user:@redshift_user,
48
+ password:@redshift_password
49
+ }
50
+ @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
51
+ $log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
52
+ @copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' REMOVEQUOTES GZIP;"
53
+ end
54
+
55
+ def start
56
+ super
57
+ # init s3 conf
58
+ options = {
59
+ :access_key_id => @aws_key_id,
60
+ :secret_access_key => @aws_sec_key
61
+ }
62
+ options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
63
+ @s3 = AWS::S3.new(options)
64
+ @bucket = @s3.buckets[@s3_bucket]
65
+ end
66
+
67
+ def format(tag, time, record)
68
+ (json?) ? record.to_msgpack : "#{record[@record_log_tag]}\n"
69
+ end
70
+
71
+ def write(chunk)
72
+ # create a gz file
73
+ tmp = Tempfile.new("s3-")
74
+ tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
75
+ : create_gz_file_from_msgpack(tmp, chunk)
76
+
77
+ # no data -> skip
78
+ unless tmp
79
+ $log.debug "received no valid data. "
80
+ return
81
+ end
82
+
83
+ # create a file path with time format
84
+ s3path = create_s3path(@bucket, @path)
85
+
86
+ # upload gz to s3
87
+ @bucket.objects[s3path].write(Pathname.new(tmp.path),
88
+ :acl => :bucket_owner_full_control)
89
+ # copy gz on s3 to redshift
90
+ s3_uri = "s3://#{@s3_bucket}/#{s3path}"
91
+ sql = @copy_sql_template % [s3_uri, @aws_sec_key]
92
+ $log.debug "start copying. s3_uri=#{s3_uri}"
93
+ conn = nil
94
+ begin
95
+ conn = PG.connect(@db_conf)
96
+ conn.exec(sql)
97
+ $log.info "completed copying to redshift. s3_uri=#{s3_uri}"
98
+ rescue PG::Error => e
99
+ $log.error "failed to copy data into redshift. sql=#{s3_uri}", :error=>e.to_s
100
+ raise e if e.result.nil? # retry if connection errors
101
+ ensure
102
+ conn.close rescue nil if conn
103
+ end
104
+ end
105
+
106
+ private
107
+ def json?
108
+ @file_type == 'json'
109
+ end
110
+
111
+ def create_gz_file_from_msgpack(dst_file, chunk)
112
+ gzw = nil
113
+ begin
114
+ gzw = Zlib::GzipWriter.new(dst_file)
115
+ chunk.write_to(gzw)
116
+ ensure
117
+ gzw.close rescue nil if gzw
118
+ end
119
+ dst_file
120
+ end
121
+
122
+ def create_gz_file_from_json(dst_file, chunk, delimiter)
123
+ # fetch the table definition from redshift
124
+ redshift_table_columns = fetch_table_columns
125
+ if redshift_table_columns == nil
126
+ raise "failed to fetch the redshift table definition."
127
+ elsif redshift_table_columns.empty?
128
+ $log.warn "no table on redshift. table_name=#{@redshift_tablename}"
129
+ return nil
130
+ end
131
+
132
+ # convert json to tsv format text
133
+ table_texts = ""
134
+ chunk.msgpack_each do |record|
135
+ begin
136
+ table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
137
+ rescue => e
138
+ $log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
139
+ $log.error_backtrace
140
+ end
141
+ end
142
+ return nil if table_texts.empty?
143
+
144
+ # create gz
145
+ gzw = nil
146
+ begin
147
+ gzw = Zlib::GzipWriter.new(dst_file)
148
+ gzw.write(table_texts)
149
+ ensure
150
+ gzw.close rescue nil if gzw
151
+ end
152
+ dst_file
153
+ end
154
+
155
+ def determine_delimiter(file_type)
156
+ case file_type
157
+ when 'json', 'tsv'
158
+ "\t"
159
+ when "csv"
160
+ ','
161
+ else
162
+ raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
163
+ end
164
+ end
165
+
166
+ def fetch_table_columns
167
+ fetch_columns_sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
168
+ conn = PG.connect(@db_conf)
169
+ begin
170
+ columns = nil
171
+ conn.exec(fetch_columns_sql) do |result|
172
+ columns = result.collect{|row| row['column_name']}
173
+ end
174
+ columns
175
+ ensure
176
+ conn.close rescue nil
177
+ end
178
+ end
179
+
180
+ def json_to_table_text(redshift_table_columns, json_text, delimiter)
181
+ return "" if json_text.nil? or json_text.empty?
182
+
183
+ # parse json text
184
+ json_obj = nil
185
+ begin
186
+ json_obj = JSON.parse(json_text)
187
+ rescue => e
188
+ $log.warn "failed to parse json. ", :error=>e.to_s
189
+ return ""
190
+ end
191
+ return "" unless json_obj
192
+
193
+ # extract values from json
194
+ val_list = redshift_table_columns.collect do |cn|
195
+ val = json_obj[cn]
196
+ val = nil unless val and not val.to_s.empty?
197
+ val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
198
+ val.to_s unless val.nil?
199
+ end
200
+ if val_list.all?{|v| v.nil? or v.empty?}
201
+ $log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
202
+ return ""
203
+ end
204
+
205
+ # generate tsv text
206
+ begin
207
+ CSV.generate(:col_sep=>delimiter, :quote_char => '"') do |row|
208
+ row << val_list # inlude new line
209
+ end
210
+ rescue => e
211
+ $log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
212
+ raise e
213
+ end
214
+ end
215
+
216
+ def create_s3path(bucket, path)
217
+ timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
218
+ i = 0
219
+ begin
220
+ suffix = "_#{'%02d' % i}"
221
+ s3path = "#{path}#{timestamp_key}#{suffix}.gz"
222
+ i += 1
223
+ end while bucket.objects[s3path].exists?
224
+ s3path
225
+ end
226
+
227
+ end
228
+
229
+
230
+ end
@@ -0,0 +1,395 @@
1
+ require 'test_helper'
2
+
3
+ require 'fluent/test'
4
+ require 'fluent/plugin/out_redshift'
5
+ require 'flexmock/test_unit'
6
+ require 'zlib'
7
+
8
+
9
+ class RedshiftOutputTest < Test::Unit::TestCase
10
+ def setup
11
+ require 'aws-sdk'
12
+ require 'pg'
13
+ require 'csv'
14
+ Fluent::Test.setup
15
+ end
16
+
17
+ CONFIG_BASE= %[
18
+ aws_key_id test_key_id
19
+ aws_sec_key test_sec_key
20
+ s3_bucket test_bucket
21
+ path log
22
+ redshift_host test_host
23
+ redshift_dbname test_db
24
+ redshift_user test_user
25
+ redshift_password test_password
26
+ redshift_tablename test_table
27
+ buffer_type memory
28
+ utc
29
+ ]
30
+ CONFIG_CSV= %[
31
+ #{CONFIG_BASE}
32
+ file_type csv
33
+ ]
34
+ CONFIG_TSV= %[
35
+ #{CONFIG_BASE}
36
+ file_type tsv
37
+ ]
38
+ CONFIG_JSON = %[
39
+ #{CONFIG_BASE}
40
+ file_type json
41
+ ]
42
+ CONFIG_PIPE_DELIMITER= %[
43
+ #{CONFIG_BASE}
44
+ delimiter |
45
+ ]
46
+ CONFIG_PIPE_DELIMITER_WITH_NAME= %[
47
+ #{CONFIG_BASE}
48
+ file_type pipe
49
+ delimiter |
50
+ ]
51
+ CONFIG=CONFIG_CSV
52
+
53
+ RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
54
+ RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
55
+ RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
56
+ RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
57
+ RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
58
+ RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
59
+ DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
60
+
61
+ def create_driver(conf = CONFIG, tag='test.input')
62
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
63
+ end
64
+
65
+ def create_driver_no_write(conf = CONFIG, tag='test.input')
66
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
67
+ def write(chunk)
68
+ chunk.read
69
+ end
70
+ end.configure(conf)
71
+ end
72
+
73
+ def test_configure
74
+ assert_raise(Fluent::ConfigError) {
75
+ d = create_driver('')
76
+ }
77
+ assert_raise(Fluent::ConfigError) {
78
+ d = create_driver(CONFIG_BASE)
79
+ }
80
+ d = create_driver(CONFIG_CSV)
81
+ assert_equal "test_key_id", d.instance.aws_key_id
82
+ assert_equal "test_sec_key", d.instance.aws_sec_key
83
+ assert_equal "test_bucket", d.instance.s3_bucket
84
+ assert_equal "log/", d.instance.path
85
+ assert_equal "test_host", d.instance.redshift_host
86
+ assert_equal 5439, d.instance.redshift_port
87
+ assert_equal "test_db", d.instance.redshift_dbname
88
+ assert_equal "test_user", d.instance.redshift_user
89
+ assert_equal "test_password", d.instance.redshift_password
90
+ assert_equal "test_table", d.instance.redshift_tablename
91
+ assert_equal "csv", d.instance.file_type
92
+ assert_equal ",", d.instance.delimiter
93
+ assert_equal true, d.instance.utc
94
+ end
95
+ def test_configure_localtime
96
+ d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
97
+ assert_equal false, d.instance.utc
98
+ end
99
+ def test_configure_no_path
100
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
101
+ assert_equal "", d.instance.path
102
+ end
103
+ def test_configure_root_path
104
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
105
+ assert_equal "", d.instance.path
106
+ end
107
+ def test_configure_path_with_slash
108
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
109
+ assert_equal "log/", d.instance.path
110
+ end
111
+ def test_configure_tsv
112
+ d1 = create_driver(CONFIG_TSV)
113
+ assert_equal "tsv", d1.instance.file_type
114
+ assert_equal "\t", d1.instance.delimiter
115
+ end
116
+ def test_configure_json
117
+ d2 = create_driver(CONFIG_JSON)
118
+ assert_equal "json", d2.instance.file_type
119
+ assert_equal "\t", d2.instance.delimiter
120
+ end
121
+ def test_configure_original_file_type
122
+ d3 = create_driver(CONFIG_PIPE_DELIMITER)
123
+ assert_equal nil, d3.instance.file_type
124
+ assert_equal "|", d3.instance.delimiter
125
+
126
+ d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
127
+ assert_equal "pipe", d4.instance.file_type
128
+ assert_equal "|", d4.instance.delimiter
129
+ end
130
+
131
+ def emit_csv(d)
132
+ d.emit(RECORD_CSV_A, DEFAULT_TIME)
133
+ d.emit(RECORD_CSV_B, DEFAULT_TIME)
134
+ end
135
+ def emit_tsv(d)
136
+ d.emit(RECORD_TSV_A, DEFAULT_TIME)
137
+ d.emit(RECORD_TSV_B, DEFAULT_TIME)
138
+ end
139
+ def emit_json(d)
140
+ d.emit(RECORD_JSON_A, DEFAULT_TIME)
141
+ d.emit(RECORD_JSON_B, DEFAULT_TIME)
142
+ end
143
+
144
+ def test_format_csv
145
+ d_csv = create_driver_no_write(CONFIG_CSV)
146
+ emit_csv(d_csv)
147
+ d_csv.expect_format RECORD_CSV_A['log'] + "\n"
148
+ d_csv.expect_format RECORD_CSV_B['log'] + "\n"
149
+ d_csv.run
150
+ end
151
+ def test_format_tsv
152
+ d_tsv = create_driver_no_write(CONFIG_TSV)
153
+ emit_tsv(d_tsv)
154
+ d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
155
+ d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
156
+ d_tsv.run
157
+ end
158
+ def test_format_json
159
+ d_json = create_driver_no_write(CONFIG_JSON)
160
+ emit_json(d_json)
161
+ d_json.expect_format RECORD_JSON_A.to_msgpack
162
+ d_json.expect_format RECORD_JSON_B.to_msgpack
163
+ d_json.run
164
+ end
165
+
166
+ class PGConnectionMock
167
+ def initialize(return_keys=['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h'])
168
+ @return_keys = return_keys
169
+ end
170
+ def exec(sql, &block)
171
+ if block_given? and /^select column_name from/ =~ sql
172
+ yield @return_keys.collect{|key| {'column_name' => key}}
173
+ end
174
+ end
175
+ def close
176
+ end
177
+ end
178
+
179
+ def setup_pg_mock
180
+ # create mock of PG
181
+ def PG.connect(dbinfo)
182
+ return PGConnectionMock.new
183
+ end
184
+ end
185
+
186
+ def setup_s3_mock(expected_data)
187
+ current_time = Time.now
188
+
189
+ # create mock of s3 object
190
+ s3obj = flexmock(AWS::S3::S3Object)
191
+ s3obj.should_receive(:exists?).with_any_args.and_return { false }
192
+ s3obj.should_receive(:write).with(
193
+ # pathname
194
+ on { |pathname|
195
+ data = nil
196
+ pathname.open { |f|
197
+ gz = Zlib::GzipReader.new(f)
198
+ data = gz.read
199
+ gz.close
200
+ }
201
+ assert_equal expected_data, data
202
+ },
203
+ :acl => :bucket_owner_full_control
204
+ ).and_return { true }
205
+
206
+ # create mock of s3 object collection
207
+ s3obj_col = flexmock(AWS::S3::ObjectCollection)
208
+ s3obj_col.should_receive(:[]).with(
209
+ on { |key|
210
+ expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
211
+ key == expected_key
212
+ }).
213
+ and_return {
214
+ s3obj
215
+ }
216
+
217
+ # create mock of s3 bucket
218
+ flexmock(AWS::S3::Bucket).new_instances do |bucket|
219
+ bucket.should_receive(:objects).with_any_args.
220
+ and_return {
221
+ s3obj_col
222
+ }
223
+ end
224
+ end
225
+
226
+ def setup_mocks(expected_data)
227
+ setup_pg_mock
228
+ setup_s3_mock(expected_data) end
229
+
230
+ def test_write_with_csv
231
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
232
+ d_csv = create_driver
233
+ emit_csv(d_csv)
234
+ d_csv.run
235
+ end
236
+
237
+ def test_write_with_json
238
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
239
+ d_json = create_driver(CONFIG_JSON)
240
+ emit_json(d_json)
241
+ d_json.run
242
+ end
243
+
244
+ def test_write_with_json_hash_value
245
+ setup_mocks("val_a\t\"{\"\"foo\"\":\"\"var\"\"}\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
246
+ d_json = create_driver(CONFIG_JSON)
247
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
248
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
249
+ d_json.run
250
+ end
251
+
252
+ def test_write_with_json_array_value
253
+ setup_mocks("val_a\t\"[\"\"foo\"\",\"\"var\"\"]\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
254
+ d_json = create_driver(CONFIG_JSON)
255
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
256
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
257
+ d_json.run
258
+ end
259
+
260
+ def test_write_with_json_no_data
261
+ setup_mocks("")
262
+ d_json = create_driver(CONFIG_JSON)
263
+ d_json.emit("", DEFAULT_TIME)
264
+ d_json.emit("", DEFAULT_TIME)
265
+ d_json.run
266
+ end
267
+
268
+ def test_write_with_json_invalid_one_line
269
+ setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
270
+ d_json = create_driver(CONFIG_JSON)
271
+ d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
272
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
273
+ d_json.run
274
+ end
275
+
276
+ def test_write_with_json_no_available_data
277
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
278
+ d_json = create_driver(CONFIG_JSON)
279
+ d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
280
+ d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
281
+ d_json.run
282
+ end
283
+
284
+ def test_write_redshift_connection_error
285
+ def PG.connect(dbinfo)
286
+ return Class.new do
287
+ def initialize(return_keys=[]); end
288
+ def exec(sql)
289
+ raise PG::Error, "redshift connection error"
290
+ end
291
+ def close; end
292
+ end.new
293
+ end
294
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
295
+
296
+ d_csv = create_driver
297
+ emit_csv(d_csv)
298
+ assert_raise(PG::Error) {
299
+ d_csv.run
300
+ }
301
+ end
302
+
303
+ def test_write_redshift_logic_error
304
+ PG::Error.module_eval { attr_accessor :result}
305
+ def PG.connect(dbinfo)
306
+ return Class.new do
307
+ def initialize(return_keys=[]); end
308
+ def exec(sql)
309
+ error = PG::Error.new("redshift logic error")
310
+ error.result = "logic error"
311
+ raise error
312
+ end
313
+ def close; end
314
+ end.new
315
+ end
316
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
317
+
318
+ d_csv = create_driver
319
+ emit_csv(d_csv)
320
+ assert_nothing_raised {
321
+ d_csv.run
322
+ }
323
+ end
324
+
325
+ def test_write_with_json_redshift_connection_error
326
+ def PG.connect(dbinfo)
327
+ return Class.new do
328
+ def initialize(return_keys=[]); end
329
+ def exec(sql, &block)
330
+ error = PG::Error.new("redshift connection error")
331
+ raise error
332
+ end
333
+ def close; end
334
+ end.new
335
+ end
336
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
337
+
338
+ d_json = create_driver(CONFIG_JSON)
339
+ emit_json(d_json)
340
+ assert_raise(PG::Error) {
341
+ d_json.run
342
+ }
343
+ end
344
+
345
+ def test_write_with_json_no_table_on_redshift
346
+ def PG.connect(dbinfo)
347
+ return Class.new do
348
+ def initialize(return_keys=[]); end
349
+ def exec(sql, &block)
350
+ yield [] if block_given?
351
+ end
352
+ def close; end
353
+ end.new
354
+ end
355
+ setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
356
+
357
+ d_json = create_driver(CONFIG_JSON)
358
+ emit_json(d_json)
359
+ assert_nothing_raised {
360
+ d_json.run
361
+ }
362
+ end
363
+
364
+ def test_write_with_json_failed_to_get_columns
365
+ def PG.connect(dbinfo)
366
+ return Class.new do
367
+ def initialize(return_keys=[]); end
368
+ def exec(sql, &block)
369
+ end
370
+ def close; end
371
+ end.new
372
+ end
373
+ setup_s3_mock("")
374
+
375
+ d_json = create_driver(CONFIG_JSON)
376
+ emit_json(d_json)
377
+ assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
378
+ d_json.run
379
+ }
380
+ end
381
+
382
+ def test_write_with_json_failed_to_generate_tsv
383
+ flexmock(CSV).should_receive(:generate).with_any_args.
384
+ and_return {
385
+ raise "failed to generate tsv."
386
+ }
387
+ setup_s3_mock("")
388
+
389
+ d_json = create_driver(CONFIG_JSON)
390
+ emit_json(d_json)
391
+ assert_nothing_raised {
392
+ d_json.run
393
+ }
394
+ end
395
+ end
@@ -0,0 +1,8 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter 'test/'
5
+ add_filter 'pkg/'
6
+ add_filter 'vendor/'
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-redshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Masashi Miyazaki
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fluentd
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.10.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: aws-sdk
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 1.6.3
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 1.6.3
46
+ - !ruby/object:Gem::Dependency
47
+ name: pg
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.14.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: simplecov
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: 0.5.4
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: 0.5.4
94
+ - !ruby/object:Gem::Dependency
95
+ name: flexmock
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 1.3.1
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.3.1
110
+ description: Amazon Redshift output plugin for Fluentd
111
+ email:
112
+ - mmasashi@gmail.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - .gitignore
118
+ - Gemfile
119
+ - README.md
120
+ - Rakefile
121
+ - VERSION
122
+ - fluent-plugin-redshift.gemspec
123
+ - lib/fluent/plugin/out_redshift.rb
124
+ - test/plugin/test_out_redshift.rb
125
+ - test/test_helper.rb
126
+ homepage: https://github.com/hapyrus/fluent-plugin-redshift
127
+ licenses: []
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ! '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 1.8.23
147
+ signing_key:
148
+ specification_version: 3
149
+ summary: Amazon Redshift output plugin for Fluentd
150
+ test_files:
151
+ - test/plugin/test_out_redshift.rb
152
+ - test/test_helper.rb