fluent-plugin-redshift-out2 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1041bb98af3d4cd15dbe1d4886a31b68bc2c1691771247b589373792429608a7
4
+ data.tar.gz: d8162bac6c674de9760c16c4f2f6ce381c13d6d3a9b52ea7fa170fba11d21cb0
5
+ SHA512:
6
+ metadata.gz: a28646dd88bc2b290bc38ae8860e49f7f841c0a4c24a78f0df98b5c41d2f82919ba5dafe25b5b6d3d16ce66b4d7515792fe506406940f026afe209027e4c94ef
7
+ data.tar.gz: 3a421e6727ee9f020f8a6a257ed3d966c611d003daa17ed88d288c210a497d3e9317f5fa49dd80b2339577e0b089280e0a2b578404c3e8007a98a503136bb611
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,156 @@
1
+ Amazon Redshift output plugin for Fluentd
2
+ ========
3
+
4
+ ## Overview
5
+
6
+ Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
7
+
8
+ ## Installation
9
+
10
+ gem install fluent-plugin-redshift-out
11
+
12
+ ## Configuration
13
+
14
+ Format:
15
+
16
+ <match my.tag>
17
+ type redshift-out
18
+
19
+ # s3 (for copying data to redshift)
20
+ aws_key_id YOUR_AWS_KEY_ID
21
+ aws_sec_key YOUR_AWS_SECRET_KEY
22
+ s3_bucket YOUR_S3_BUCKET
23
+ s3_endpoint YOUR_S3_BUCKET_END_POINT
24
+ path YOUR_S3_PATH
25
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
26
+
27
+ # redshift
28
+ redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
29
+ redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
30
+ redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
31
+ redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
32
+ redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
33
+ redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
34
+ redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
35
+ file_type [tsv|csv|json|msgpack]
36
+
37
+ # buffer
38
+ buffer_type file
39
+ buffer_path /var/log/fluent/redshift
40
+ flush_interval 15m
41
+ buffer_chunk_limit 1g
42
+ </match>
43
+
44
+ Example (watch and upload json formatted apache log):
45
+
46
+ <source>
47
+ type tail
48
+ path redshift_test.json
49
+ pos_file redshift_test_json.pos
50
+ tag redshift.json
51
+ format /^(?<log>.*)$/
52
+ </source>
53
+
54
+ <match redshift.json>
55
+ type redshift-out
56
+
57
+ # s3 (for copying data to redshift)
58
+ aws_key_id YOUR_AWS_KEY_ID
59
+ aws_sec_key YOUR_AWS_SECRET_KEY
60
+ s3_bucket hapyrus-example
61
+ s3_endpoint s3.amazonaws.com
62
+ path path/on/s3/apache_json_log/
63
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
64
+
65
+ # redshift
66
+ redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
67
+ redshift_port 5439
68
+ redshift_dbname fluent-redshift-test
69
+ redshift_user fluent
70
+ redshift_password fluent-password
71
+ redshift_tablename apache_log
72
+ file_type json
73
+
74
+ # buffer
75
+ buffer_type file
76
+ buffer_path /var/log/fluent/redshift
77
+ flush_interval 15m
78
+ buffer_chunk_limit 1g
79
+ <match>
80
+
81
+ + `type` (required) : The value must be `redshift-out`.
82
+
83
+ + `aws_key_id` : AWS access key id to access s3 bucket.
84
+
85
+ + `aws_sec_key` : AWS securet key id to access s3 bucket.
86
+
87
+ + `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
88
+
89
+ + `s3_endpoint` : s3 endpoint.
90
+
91
+ + `path` (required) : s3 path to input.
92
+
93
+ + `timestamp_key_format` : The format of the object keys. It can include date-format directives.
94
+
95
+ - Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
96
+ - For example, the s3 path is as following with the above example configration.
97
+ <pre>
98
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
99
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
100
+ </pre>
101
+
102
+ + `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
103
+
104
+ + `redshift_port` (required) : port number.
105
+
106
+ + `redshift_dbname` (required) : database name.
107
+
108
+ + `redshift_user` (required) : user name.
109
+
110
+ + `redshift_password` (required) : password for the user name.
111
+
112
+ + `redshift_tablename` (required) : table name to store data.
113
+
114
+ + `redshift_schemaname` : schema name to store data. By default, this option is not set and find table without schema as your own search_path.
115
+
116
+ + `redshift_connect_timeout` : maximum time to wait for connection to succeed.
117
+
118
+ + `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
119
+
120
+ + `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
121
+
122
+ + `buffer_type` : buffer type.
123
+
124
+ + `buffer_path` : path prefix of the files to buffer logs.
125
+
126
+ + `flush_interval` : flush interval.
127
+
128
+ + `buffer_chunk_limit` : limit buffer size to chunk.
129
+
130
+ + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
131
+
132
+ ## Logging examples
133
+ ```ruby
134
+ # examples by fluent-logger
135
+ require 'fluent-logger'
136
+ log = Fluent::Logger::FluentLogger.new(nil, :host => 'localhost', :port => 24224)
137
+
138
+ # file_type: csv
139
+ log.post('your.tag', :log => "12345,12345")
140
+
141
+ # file_type: tsv
142
+ log.post('your.tag', :log => "12345\t12345")
143
+
144
+ # file_type: json
145
+ require 'json'
146
+ log.post('your.tag', :log => { :user_id => 12345, :data_id => 12345 }.to_json)
147
+
148
+ # file_type: msgpack
149
+ log.post('your.tag', :user_id => 12345, :data_id => 12345)
150
+ ```
151
+
152
+ ## License
153
+
154
+ Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
155
+
156
+ [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
@@ -0,0 +1,16 @@
1
+ require "bundler"
2
+ Bundler::GemHelper.install_tasks
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.test_files = FileList['test/plugin/*.rb']
8
+ test.verbose = true
9
+ end
10
+
11
+ task :coverage do |t|
12
+ ENV['COVERAGE'] = '1'
13
+ Rake::Task["test"].invoke
14
+ end
15
+
16
+ task :default => [:build]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.1
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-redshift-out2"
6
+ gem.version = File.read("VERSION").strip
7
+ gem.authors = ["Ertugrul Yilmaz"]
8
+ gem.email = ["***.***.***@gmail.com"]
9
+ gem.description = %q{Amazon Redshift output plugin for Fluentd}
10
+ gem.summary = gem.description
11
+ gem.homepage = "https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out"
12
+ gem.has_rdoc = false
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
20
+ gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
21
+ gem.add_dependency "multi_json", "~> 1.10"
22
+ gem.add_dependency "yajl-ruby", "~> 1.2"
23
+ gem.add_dependency "pg", "~> 0.17.0"
24
+ gem.add_development_dependency "rake"
25
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
26
+ gem.add_development_dependency "flexmock", ">= 1.3.1"
27
+ end
@@ -0,0 +1,397 @@
1
+ module Fluent
2
+
3
+
4
+ class RedshiftOutput < BufferedOutput
5
+ Fluent::Plugin.register_output('redshift-out', self)
6
+
7
+ NULL_CHAR_FOR_COPY = "\\N"
8
+
9
+ # ignore load table error. (invalid data format)
10
+ IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
11
+
12
+ def initialize
13
+ super
14
+ require 'aws-sdk-v1'
15
+ require 'zlib'
16
+ require 'time'
17
+ require 'tempfile'
18
+ require 'pg'
19
+ require 'csv'
20
+ require 'multi_json'
21
+ require 'yajl'
22
+ ::MultiJson.use(:yajl)
23
+ end
24
+
25
+ config_param :record_log_tag, :string, :default => 'log'
26
+ # s3
27
+ config_param :aws_key_id, :string, :secret => true, :default => nil
28
+ config_param :aws_sec_key, :string, :secret => true, :default => nil
29
+ config_param :s3_bucket, :string
30
+ config_param :s3_region, :string, :default => nil
31
+ config_param :path, :string, :default => ""
32
+ config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
33
+ config_param :utc, :bool, :default => false
34
+ # redshift
35
+ config_param :redshift_host, :string
36
+ config_param :redshift_port, :integer, :default => 5439
37
+ config_param :redshift_dbname, :string
38
+ config_param :redshift_user, :string
39
+ config_param :redshift_password, :string, :secret => true
40
+ config_param :redshift_tablename, :string
41
+ config_param :redshift_schemaname, :string, :default => nil
42
+ config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
43
+ config_param :redshift_copy_options, :string , :default => nil
44
+ config_param :redshift_connect_timeout, :integer, :default => 10
45
+ # file format
46
+ config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
47
+ config_param :delimiter, :string, :default => nil
48
+ # maintenance
49
+ config_param :maintenance_file_path, :string, :default => nil
50
+ # for debug
51
+ config_param :log_suffix, :string, :default => ''
52
+
53
+ def configure(conf)
54
+ super
55
+ @path = "#{@path}/" unless @path.end_with?('/') # append last slash
56
+ @path = @path[1..-1] if @path.start_with?('/') # remove head slash
57
+ @utc = true if conf['utc']
58
+ @db_conf = {
59
+ host:@redshift_host,
60
+ port:@redshift_port,
61
+ dbname:@redshift_dbname,
62
+ user:@redshift_user,
63
+ password:@redshift_password,
64
+ connect_timeout: @redshift_connect_timeout
65
+ }
66
+ @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
67
+ $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
68
+ @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
69
+ @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
70
+ end
71
+
72
+ def start
73
+ super
74
+ # init s3 conf
75
+ options = {}
76
+ if @aws_key_id && @aws_sec_key
77
+ options[:access_key_id] = @aws_key_id
78
+ options[:secret_access_key] = @aws_sec_key
79
+ end
80
+ options[:region] = @s3_region if @s3_region
81
+ @s3 = AWS::S3.new(options)
82
+ @bucket = @s3.buckets[@s3_bucket]
83
+ @redshift_connection = RedshiftConnection.new(@db_conf)
84
+ @redshift_connection.connect_start
85
+ end
86
+
87
+ def format(tag, time, record)
88
+ if json?
89
+ record.to_msgpack
90
+ elsif msgpack?
91
+ { @record_log_tag => record }.to_msgpack
92
+ else
93
+ "#{record[@record_log_tag]}\n"
94
+ end
95
+ end
96
+
97
+ def write(chunk)
98
+ $log.debug format_log("start creating gz.")
99
+ @maintenance_monitor.check_maintenance!
100
+
101
+ # create a gz file
102
+ tmp = Tempfile.new("s3-")
103
+ tmp =
104
+ if json? || msgpack?
105
+ create_gz_file_from_structured_data(tmp, chunk, @delimiter)
106
+ else
107
+ create_gz_file_from_flat_data(tmp, chunk)
108
+ end
109
+
110
+ # no data -> skip
111
+ unless tmp
112
+ $log.debug format_log("received no valid data. ")
113
+ return false # for debug
114
+ end
115
+
116
+ # create a file path with time format
117
+ s3path = create_s3path(@bucket, @path)
118
+
119
+ # upload gz to s3
120
+ @bucket.objects[s3path].write(Pathname.new(tmp.path),
121
+ :acl => :bucket_owner_full_control)
122
+
123
+ # close temp file
124
+ tmp.close!
125
+
126
+ # copy gz on s3 to redshift
127
+ s3_uri = "s3://#{@s3_bucket}/#{s3path}"
128
+ credentials = @s3.client.credential_provider.credentials
129
+ sql = "copy #{@table_name_with_schema} from '#{s3_uri}'"
130
+ sql += " CREDENTIALS 'aws_access_key_id=#{credentials[:access_key_id]};aws_secret_access_key=#{credentials[:secret_access_key]}"
131
+ sql += ";token=#{credentials[:session_token]}" if credentials[:session_token]
132
+ sql += "' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
133
+
134
+ $log.debug format_log("start copying. s3_uri=#{s3_uri}")
135
+
136
+ begin
137
+ @redshift_connection.exec(sql)
138
+ $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
139
+ rescue RedshiftError => e
140
+ if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
141
+ $log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
142
+ return false # for debug
143
+ end
144
+ raise e
145
+ end
146
+ true # for debug
147
+ end
148
+
149
+ protected
150
+
151
+ def format_log(message)
152
+ (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
153
+ end
154
+
155
+ private
156
+
157
+ def json?
158
+ @file_type == 'json'
159
+ end
160
+
161
+ def msgpack?
162
+ @file_type == 'msgpack'
163
+ end
164
+
165
+ def create_gz_file_from_flat_data(dst_file, chunk)
166
+ gzw = nil
167
+ begin
168
+ gzw = Zlib::GzipWriter.new(dst_file)
169
+ chunk.write_to(gzw)
170
+ ensure
171
+ gzw.close rescue nil if gzw
172
+ end
173
+ dst_file
174
+ end
175
+
176
+ def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
177
+ # fetch the table definition from redshift
178
+ redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
179
+ if redshift_table_columns == nil
180
+ raise "failed to fetch the redshift table definition."
181
+ elsif redshift_table_columns.empty?
182
+ $log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
183
+ return nil
184
+ end
185
+
186
+ # convert json to tsv format text
187
+ gzw = nil
188
+ begin
189
+ gzw = Zlib::GzipWriter.new(dst_file)
190
+ chunk.msgpack_each do |record|
191
+ next unless record
192
+ begin
193
+ hash = json? ? json_to_hash(record[@record_log_tag]) : record[@record_log_tag]
194
+ tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
195
+ gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
196
+ rescue => e
197
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
198
+ $log.error_backtrace
199
+ end
200
+ end
201
+ return nil unless gzw.pos > 0
202
+ ensure
203
+ gzw.close rescue nil if gzw
204
+ end
205
+ dst_file
206
+ end
207
+
208
+ def determine_delimiter(file_type)
209
+ case file_type
210
+ when 'json', 'msgpack', 'tsv'
211
+ "\t"
212
+ when "csv"
213
+ ','
214
+ else
215
+ raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
216
+ end
217
+ end
218
+
219
+ def json_to_hash(json_text)
220
+ return nil if json_text.to_s.empty?
221
+
222
+ MultiJson.load(json_text)
223
+ rescue => e
224
+ $log.warn format_log("failed to parse json. "), :error => e.to_s
225
+ nil
226
+ end
227
+
228
+ def hash_to_table_text(redshift_table_columns, hash, delimiter)
229
+ return "" unless hash
230
+
231
+ # extract values from hash
232
+ val_list = redshift_table_columns.collect {|cn| hash[cn]}
233
+
234
+ if val_list.all?{|v| v.nil?}
235
+ $log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
236
+ return ""
237
+ end
238
+
239
+ generate_line_with_delimiter(val_list, delimiter)
240
+ end
241
+
242
+ def generate_line_with_delimiter(val_list, delimiter)
243
+ val_list.collect do |val|
244
+ case val
245
+ when nil
246
+ NULL_CHAR_FOR_COPY
247
+ when ''
248
+ ''
249
+ when Hash, Array
250
+ escape_text_for_copy(MultiJson.dump(val))
251
+ else
252
+ escape_text_for_copy(val.to_s)
253
+ end
254
+ end.join(delimiter) + "\n"
255
+ end
256
+
257
+ def escape_text_for_copy(val)
258
+ val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
259
+ end
260
+
261
+ def create_s3path(bucket, path)
262
+ timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
263
+ i = 0
264
+ begin
265
+ suffix = "_#{'%02d' % i}"
266
+ s3path = "#{path}#{timestamp_key}#{suffix}.gz"
267
+ i += 1
268
+ end while bucket.objects[s3path].exists?
269
+ s3path
270
+ end
271
+
272
+ class RedshiftError < StandardError
273
+ def initialize(msg)
274
+ case msg
275
+ when PG::Error
276
+ @pg_error = msg
277
+ super(msg.to_s)
278
+ set_backtrace(msg.backtrace)
279
+ else
280
+ super
281
+ end
282
+ end
283
+
284
+ attr_accessor :pg_error
285
+ end
286
+
287
+ class RedshiftConnection
288
+ REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
289
+
290
+ def initialize(db_conf)
291
+ @db_conf = db_conf
292
+ @connection = nil
293
+ ObjectSpace.define_finalizer(self) {
294
+ close()
295
+ }
296
+ end
297
+
298
+ attr_reader :db_conf
299
+
300
+ def fetch_table_columns(table_name, schema_name)
301
+ columns = nil
302
+ exec(fetch_columns_sql(table_name, schema_name)) do |result|
303
+ columns = result.collect{|row| row['column_name']}
304
+ end
305
+ columns
306
+ end
307
+
308
+ def exec(sql, &block)
309
+ conn = @connection
310
+ conn = create_redshift_connection if conn.nil?
311
+ if block
312
+ conn.exec(sql) {|result| block.call(result)}
313
+ else
314
+ conn.exec(sql)
315
+ end
316
+ rescue PG::Error => e
317
+ raise RedshiftError.new(e)
318
+ ensure
319
+ conn.close if conn && @connection.nil?
320
+ end
321
+
322
+ def connect_start
323
+ @connection = create_redshift_connection
324
+ end
325
+
326
+ def close
327
+ @connection.close rescue nil if @connection
328
+ @connection = nil
329
+ end
330
+
331
+ private
332
+
333
+ def create_redshift_connection
334
+ hostaddr = IPSocket.getaddress(db_conf[:host])
335
+ db_conf[:hostaddr] = hostaddr
336
+
337
+ conn = PG::Connection.connect_start(db_conf)
338
+ raise RedshiftError.new("Unable to create a new connection.") unless conn
339
+ if conn.status == PG::CONNECTION_BAD
340
+ raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
341
+ end
342
+
343
+ socket = conn.socket_io
344
+ poll_status = PG::PGRES_POLLING_WRITING
345
+ until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
346
+ case poll_status
347
+ when PG::PGRES_POLLING_READING
348
+ IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
349
+ raise RedshiftError.new("Asynchronous connection timed out!(READING)")
350
+ when PG::PGRES_POLLING_WRITING
351
+ IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
352
+ raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
353
+ end
354
+ poll_status = conn.connect_poll
355
+ end
356
+
357
+ unless conn.status == PG::CONNECTION_OK
358
+ raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
359
+ end
360
+
361
+ conn
362
+ rescue => e
363
+ conn.close rescue nil if conn
364
+ raise RedshiftError.new(e) if e.kind_of?(PG::Error)
365
+ raise e
366
+ end
367
+
368
+ def fetch_columns_sql(table_name, schema_name = nil)
369
+ sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
370
+ sql << " and table_schema = '#{schema_name}'" if schema_name
371
+ sql << " order by ordinal_position;"
372
+ sql
373
+ end
374
+ end
375
+
376
+ class MaintenanceError < StandardError
377
+ end
378
+
379
+ class MaintenanceMonitor
380
+ def initialize(maintenance_file_path)
381
+ @file_path = maintenance_file_path
382
+ end
383
+
384
+ def in_maintenance?
385
+ !!(@file_path && File.exists?(@file_path))
386
+ end
387
+
388
+ def check_maintenance!
389
+ if in_maintenance?
390
+ raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
391
+ end
392
+ end
393
+ end
394
+ end
395
+
396
+
397
+ end
@@ -0,0 +1,503 @@
1
+ require 'test_helper'
2
+
3
+ require 'fluent/test'
4
+ require 'fluent/plugin/out_redshift-out'
5
+ require 'flexmock/test_unit'
6
+ require 'zlib'
7
+
8
+
9
+ class RedshiftOutputTest < Test::Unit::TestCase
10
+ def setup
11
+ require 'aws-sdk-v1'
12
+ require 'pg'
13
+ require 'csv'
14
+ Fluent::Test.setup
15
+ PG::Error.module_eval { attr_accessor :result}
16
+ end
17
+
18
+ MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
19
+
20
+ CONFIG_BASE= %[
21
+ aws_key_id test_key_id
22
+ aws_sec_key test_sec_key
23
+ s3_bucket test_bucket
24
+ path log
25
+ redshift_host test_host
26
+ redshift_dbname test_db
27
+ redshift_user test_user
28
+ redshift_password test_password
29
+ redshift_tablename test_table
30
+ buffer_type memory
31
+ utc
32
+ log_suffix id:5 host:localhost
33
+ maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
34
+ ]
35
+ CONFIG_CSV= %[
36
+ #{CONFIG_BASE}
37
+ file_type csv
38
+ ]
39
+ CONFIG_TSV= %[
40
+ #{CONFIG_BASE}
41
+ file_type tsv
42
+ ]
43
+ CONFIG_JSON = %[
44
+ #{CONFIG_BASE}
45
+ file_type json
46
+ ]
47
+ CONFIG_JSON_WITH_SCHEMA = %[
48
+ #{CONFIG_BASE}
49
+ redshift_schemaname test_schema
50
+ file_type json
51
+ ]
52
+ CONFIG_MSGPACK = %[
53
+ #{CONFIG_BASE}
54
+ file_type msgpack
55
+ ]
56
+ CONFIG_PIPE_DELIMITER= %[
57
+ #{CONFIG_BASE}
58
+ delimiter |
59
+ ]
60
+ CONFIG_PIPE_DELIMITER_WITH_NAME= %[
61
+ #{CONFIG_BASE}
62
+ file_type pipe
63
+ delimiter |
64
+ ]
65
+ CONFIG=CONFIG_CSV
66
+
67
+ RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
68
+ RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
69
+ RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
70
+ RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
71
+ RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
72
+ RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
73
+ RECORD_MSGPACK_A = {"key_a" => "val_a", "key_b" => "val_b"}
74
+ RECORD_MSGPACK_B = {"key_c" => "val_c", "key_d" => "val_d"}
75
+ DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
76
+
77
+ def create_driver(conf = CONFIG, tag='test.input')
78
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
79
+ end
80
+
81
+ def create_driver_no_write(conf = CONFIG, tag='test.input')
82
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
83
+ def write(chunk)
84
+ chunk.read
85
+ end
86
+ end.configure(conf)
87
+ end
88
+
89
+ def test_configure
90
+ assert_raise(Fluent::ConfigError) {
91
+ d = create_driver('')
92
+ }
93
+ assert_raise(Fluent::ConfigError) {
94
+ d = create_driver(CONFIG_BASE)
95
+ }
96
+ d = create_driver(CONFIG_CSV)
97
+ assert_equal "test_key_id", d.instance.aws_key_id
98
+ assert_equal "test_sec_key", d.instance.aws_sec_key
99
+ assert_equal "test_bucket", d.instance.s3_bucket
100
+ assert_equal "log/", d.instance.path
101
+ assert_equal "test_host", d.instance.redshift_host
102
+ assert_equal 5439, d.instance.redshift_port
103
+ assert_equal "test_db", d.instance.redshift_dbname
104
+ assert_equal "test_user", d.instance.redshift_user
105
+ assert_equal "test_password", d.instance.redshift_password
106
+ assert_equal "test_table", d.instance.redshift_tablename
107
+ assert_equal nil, d.instance.redshift_schemaname
108
+ assert_equal "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS", d.instance.redshift_copy_base_options
109
+ assert_equal nil, d.instance.redshift_copy_options
110
+ assert_equal "csv", d.instance.file_type
111
+ assert_equal ",", d.instance.delimiter
112
+ assert_equal true, d.instance.utc
113
+ assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
114
+ end
115
+ def test_configure_with_schemaname
116
+ d = create_driver(CONFIG_JSON_WITH_SCHEMA)
117
+ assert_equal "test_schema", d.instance.redshift_schemaname
118
+ end
119
+ def test_configure_localtime
120
+ d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
121
+ assert_equal false, d.instance.utc
122
+ end
123
+ def test_configure_no_path
124
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
125
+ assert_equal "", d.instance.path
126
+ end
127
+ def test_configure_root_path
128
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
129
+ assert_equal "", d.instance.path
130
+ end
131
+ def test_configure_path_with_slash
132
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
133
+ assert_equal "log/", d.instance.path
134
+ end
135
+ def test_configure_path_starts_with_slash
136
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log/'))
137
+ assert_equal "log/", d.instance.path
138
+ end
139
+ def test_configure_path_starts_with_slash_without_last_slash
140
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log'))
141
+ assert_equal "log/", d.instance.path
142
+ end
143
+ def test_configure_tsv
144
+ d1 = create_driver(CONFIG_TSV)
145
+ assert_equal "tsv", d1.instance.file_type
146
+ assert_equal "\t", d1.instance.delimiter
147
+ end
148
+ def test_configure_json
149
+ d2 = create_driver(CONFIG_JSON)
150
+ assert_equal "json", d2.instance.file_type
151
+ assert_equal "\t", d2.instance.delimiter
152
+ end
153
+ def test_configure_msgpack
154
+ d2 = create_driver(CONFIG_MSGPACK)
155
+ assert_equal "msgpack", d2.instance.file_type
156
+ assert_equal "\t", d2.instance.delimiter
157
+ end
158
+ def test_configure_original_file_type
159
+ d3 = create_driver(CONFIG_PIPE_DELIMITER)
160
+ assert_equal nil, d3.instance.file_type
161
+ assert_equal "|", d3.instance.delimiter
162
+
163
+ d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
164
+ assert_equal "pipe", d4.instance.file_type
165
+ assert_equal "|", d4.instance.delimiter
166
+ end
167
+ def test_configure_no_log_suffix
168
+ d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
169
+ assert_equal "", d.instance.log_suffix
170
+ end
171
+
172
+ def emit_csv(d)
173
+ d.emit(RECORD_CSV_A, DEFAULT_TIME)
174
+ d.emit(RECORD_CSV_B, DEFAULT_TIME)
175
+ end
176
+ def emit_tsv(d)
177
+ d.emit(RECORD_TSV_A, DEFAULT_TIME)
178
+ d.emit(RECORD_TSV_B, DEFAULT_TIME)
179
+ end
180
+ def emit_json(d)
181
+ d.emit(RECORD_JSON_A, DEFAULT_TIME)
182
+ d.emit(RECORD_JSON_B, DEFAULT_TIME)
183
+ end
184
+ def emit_msgpack(d)
185
+ d.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
186
+ d.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
187
+ end
188
+
189
+ def test_format_csv
190
+ setup_mocks("")
191
+ d_csv = create_driver_no_write(CONFIG_CSV)
192
+ emit_csv(d_csv)
193
+ d_csv.expect_format RECORD_CSV_A['log'] + "\n"
194
+ d_csv.expect_format RECORD_CSV_B['log'] + "\n"
195
+ d_csv.run
196
+ end
197
+ def test_format_tsv
198
+ setup_mocks("")
199
+ d_tsv = create_driver_no_write(CONFIG_TSV)
200
+ emit_tsv(d_tsv)
201
+ d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
202
+ d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
203
+ d_tsv.run
204
+ end
205
+ def test_format_json
206
+ setup_mocks("")
207
+ d_json = create_driver_no_write(CONFIG_JSON)
208
+ emit_json(d_json)
209
+ d_json.expect_format RECORD_JSON_A.to_msgpack
210
+ d_json.expect_format RECORD_JSON_B.to_msgpack
211
+ d_json.run
212
+ end
213
+
214
+ def test_format_msgpack
215
+ setup_mocks("")
216
+ d_msgpack = create_driver_no_write(CONFIG_MSGPACK)
217
+ emit_msgpack(d_msgpack)
218
+ d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_A }.to_msgpack)
219
+ d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_B }.to_msgpack)
220
+ d_msgpack.run
221
+ end
222
+
223
+ def setup_redshift_connection_mock(options = {})
224
+ options ||= {}
225
+ column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
226
+ schema_name = options[:schema_name]
227
+ table_name = options[:table_name] || 'test_table'
228
+ exec_sql_proc = options[:exec_sql_proc]
229
+
230
+ column_list_query_regex =
231
+ if schema_name
232
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
233
+ else
234
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
235
+ end
236
+ copy_query_regex =
237
+ if schema_name
238
+ /\Acopy #{schema_name}.#{table_name} from/
239
+ else
240
+ /\Acopy #{table_name} from/
241
+ end
242
+
243
+ flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
244
+ conn.should_receive(:exec).and_return do |sql, block|
245
+ if exec_sql_proc
246
+ exec_sql_proc.call(sql, block)
247
+ elsif block
248
+ if sql =~ column_list_query_regex
249
+ block.call column_names.collect{|key| {'column_name' => key}}
250
+ else
251
+ block.call []
252
+ end
253
+ else
254
+ unless sql =~ copy_query_regex
255
+ error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
256
+ error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
257
+ raise Fluent::RedshiftOutput::RedshiftError.new(error)
258
+ end
259
+ end
260
+ end
261
+ conn.should_receive(:connect_start)
262
+ end
263
+ end
264
+
265
+ def setup_s3_mock(expected_data)
266
+ current_time = Time.now
267
+
268
+ # create mock of s3 object
269
+ s3obj = flexmock(AWS::S3::S3Object)
270
+ s3obj.should_receive(:exists?).with_any_args.and_return { false }
271
+ s3obj.should_receive(:write).with(
272
+ # pathname
273
+ on { |pathname|
274
+ data = nil
275
+ pathname.open { |f|
276
+ gz = Zlib::GzipReader.new(f)
277
+ data = gz.read
278
+ gz.close
279
+ }
280
+ assert_equal expected_data, data
281
+ },
282
+ :acl => :bucket_owner_full_control
283
+ ).and_return { true }
284
+
285
+ # create mock of s3 object collection
286
+ s3obj_col = flexmock(AWS::S3::ObjectCollection)
287
+ s3obj_col.should_receive(:[]).with(
288
+ on { |key|
289
+ expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
290
+ key == expected_key
291
+ }).
292
+ and_return {
293
+ s3obj
294
+ }
295
+
296
+ # create mock of s3 bucket
297
+ flexmock(AWS::S3::Bucket).new_instances do |bucket|
298
+ bucket.should_receive(:objects).with_any_args.
299
+ and_return {
300
+ s3obj_col
301
+ }
302
+ end
303
+ end
304
+
305
+ def setup_tempfile_mock_to_be_closed
306
+ flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
307
+ end
308
+
309
+ def setup_mocks(expected_data, options = {})
310
+ setup_redshift_connection_mock(options)
311
+ setup_s3_mock(expected_data)
312
+ end
313
+
314
+ def test_write_with_csv
315
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
316
+ setup_tempfile_mock_to_be_closed
317
+ d_csv = create_driver
318
+ emit_csv(d_csv)
319
+ assert_equal true, d_csv.run
320
+ end
321
+
322
+ def test_write_with_json
323
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
324
+ setup_tempfile_mock_to_be_closed
325
+ d_json = create_driver(CONFIG_JSON)
326
+ emit_json(d_json)
327
+ assert_equal true, d_json.run
328
+ end
329
+
330
+ def test_write_with_json_hash_value
331
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
332
+ d_json = create_driver(CONFIG_JSON)
333
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
334
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
335
+ assert_equal true, d_json.run
336
+ end
337
+
338
+ def test_write_with_json_array_value
339
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
340
+ d_json = create_driver(CONFIG_JSON)
341
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
342
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
343
+ assert_equal true, d_json.run
344
+ end
345
+
346
+ def test_write_with_json_including_tab_newline_quote
347
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
348
+ d_json = create_driver(CONFIG_JSON)
349
+ d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
350
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
351
+ assert_equal true, d_json.run
352
+ end
353
+
354
+ def test_write_with_json_empty_text_value
355
+ setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
356
+ d_json = create_driver(CONFIG_JSON)
357
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
358
+ assert_equal true, d_json.run
359
+ end
360
+
361
+ def test_write_with_json_no_data
362
+ setup_mocks("")
363
+ d_json = create_driver(CONFIG_JSON)
364
+ d_json.emit("", DEFAULT_TIME)
365
+ d_json.emit("", DEFAULT_TIME)
366
+ assert_equal false, d_json.run
367
+ end
368
+
369
+ def test_write_with_json_invalid_one_line
370
+ setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
371
+ d_json = create_driver(CONFIG_JSON)
372
+ d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
373
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
374
+ assert_equal true, d_json.run
375
+ end
376
+
377
+ def test_write_with_json_no_available_data
378
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
379
+ d_json = create_driver(CONFIG_JSON)
380
+ d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
381
+ d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
382
+ assert_equal true, d_json.run
383
+ end
384
+
385
+ def test_write_with_msgpack
386
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
387
+ d_msgpack = create_driver(CONFIG_MSGPACK)
388
+ emit_msgpack(d_msgpack)
389
+ assert_equal true, d_msgpack.run
390
+ end
391
+
392
+ def test_write_with_msgpack_hash_value
393
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
394
+ d_msgpack = create_driver(CONFIG_MSGPACK)
395
+ d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
396
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
397
+ assert_equal true, d_msgpack.run
398
+ end
399
+
400
+ def test_write_with_msgpack_array_value
401
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
402
+ d_msgpack = create_driver(CONFIG_MSGPACK)
403
+ d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
404
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
405
+ assert_equal true, d_msgpack.run
406
+ end
407
+
408
+ def test_write_with_msgpack_including_tab_newline_quote
409
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
410
+ d_msgpack = create_driver(CONFIG_MSGPACK)
411
+ d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
412
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
413
+ assert_equal true, d_msgpack.run
414
+ end
415
+
416
+ def test_write_with_msgpack_no_data
417
+ setup_mocks("")
418
+ d_msgpack = create_driver(CONFIG_MSGPACK)
419
+ d_msgpack.emit({}, DEFAULT_TIME)
420
+ d_msgpack.emit({}, DEFAULT_TIME)
421
+ assert_equal false, d_msgpack.run
422
+ end
423
+
424
+ def test_write_with_msgpack_no_available_data
425
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
426
+ d_msgpack = create_driver(CONFIG_MSGPACK)
427
+ d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
428
+ d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
429
+ assert_equal true, d_msgpack.run
430
+ end
431
+
432
+ def test_write_redshift_connection_error
433
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
434
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
435
+ d_csv = create_driver
436
+ emit_csv(d_csv)
437
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
438
+ d_csv.run
439
+ }
440
+ end
441
+
442
+ def test_write_redshift_load_error
443
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
444
+ exec_sql_proc: Proc.new {|sql, block|
445
+ msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
446
+ raise Fluent::RedshiftOutput::RedshiftError.new(msg)
447
+ })
448
+
449
+ d_csv = create_driver
450
+ emit_csv(d_csv)
451
+ assert_equal false, d_csv.run
452
+ end
453
+
454
+ def test_write_with_json_redshift_connection_error
455
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
456
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
457
+
458
+ d_json = create_driver(CONFIG_JSON)
459
+ emit_json(d_json)
460
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
461
+ d_json.run
462
+ }
463
+ end
464
+
465
+ def test_write_with_json_no_table_on_redshift
466
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
467
+ exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
468
+
469
+ d_json = create_driver(CONFIG_JSON)
470
+ emit_json(d_json)
471
+ assert_equal false, d_json.run
472
+ end
473
+
474
+ def test_write_with_json_failed_to_get_columns
475
+ setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
476
+
477
+ d_json = create_driver(CONFIG_JSON)
478
+ emit_json(d_json)
479
+ assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
480
+ d_json.run
481
+ }
482
+ end
483
+
484
+ def test_write_with_json_fetch_column_with_schema
485
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
486
+ schema_name: 'test_schema')
487
+ d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
488
+ emit_json(d_json)
489
+ assert_equal true, d_json.run
490
+ end
491
+
492
+ def test_maintenance_mode
493
+ setup_mocks("")
494
+ flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
495
+
496
+ d_json = create_driver(CONFIG_JSON)
497
+ emit_json(d_json)
498
+ assert_raise(Fluent::RedshiftOutput::MaintenanceError,
499
+ "Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
500
+ d_json.run
501
+ }
502
+ end
503
+ end
@@ -0,0 +1,8 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter 'test/'
5
+ add_filter 'pkg/'
6
+ add_filter 'vendor/'
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-redshift-out2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - Ertugrul Yilmaz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-07-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.10.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: aws-sdk-v1
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 1.6.3
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.6.3
47
+ - !ruby/object:Gem::Dependency
48
+ name: multi_json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.10'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.10'
61
+ - !ruby/object:Gem::Dependency
62
+ name: yajl-ruby
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.2'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.2'
75
+ - !ruby/object:Gem::Dependency
76
+ name: pg
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 0.17.0
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: 0.17.0
89
+ - !ruby/object:Gem::Dependency
90
+ name: rake
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ - !ruby/object:Gem::Dependency
104
+ name: simplecov
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.5.4
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: 0.5.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: flexmock
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: 1.3.1
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: 1.3.1
131
+ description: Amazon Redshift output plugin for Fluentd
132
+ email:
133
+ - "***.***.***@gmail.com"
134
+ executables: []
135
+ extensions: []
136
+ extra_rdoc_files: []
137
+ files:
138
+ - ".gitignore"
139
+ - Gemfile
140
+ - README.md
141
+ - Rakefile
142
+ - VERSION
143
+ - fluent-plugin-redshift-out.gemspec
144
+ - lib/fluent/plugin/out_redshift-out.rb
145
+ - test/plugin/test_out_redshift.rb
146
+ - test/test_helper.rb
147
+ homepage: https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out
148
+ licenses: []
149
+ metadata: {}
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - ">="
162
+ - !ruby/object:Gem::Version
163
+ version: '0'
164
+ requirements: []
165
+ rubygems_version: 3.0.3
166
+ signing_key:
167
+ specification_version: 4
168
+ summary: Amazon Redshift output plugin for Fluentd
169
+ test_files:
170
+ - test/plugin/test_out_redshift.rb
171
+ - test/test_helper.rb