fluent-plugin-redshift-out2 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1041bb98af3d4cd15dbe1d4886a31b68bc2c1691771247b589373792429608a7
4
+ data.tar.gz: d8162bac6c674de9760c16c4f2f6ce381c13d6d3a9b52ea7fa170fba11d21cb0
5
+ SHA512:
6
+ metadata.gz: a28646dd88bc2b290bc38ae8860e49f7f841c0a4c24a78f0df98b5c41d2f82919ba5dafe25b5b6d3d16ce66b4d7515792fe506406940f026afe209027e4c94ef
7
+ data.tar.gz: 3a421e6727ee9f020f8a6a257ed3d966c611d003daa17ed88d288c210a497d3e9317f5fa49dd80b2339577e0b089280e0a2b578404c3e8007a98a503136bb611
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,156 @@
1
+ Amazon Redshift output plugin for Fluentd
2
+ ========
3
+
4
+ ## Overview
5
+
6
+ Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
7
+
8
+ ## Installation
9
+
10
+ gem install fluent-plugin-redshift-out
11
+
12
+ ## Configuration
13
+
14
+ Format:
15
+
16
+ <match my.tag>
17
+ type redshift-out
18
+
19
+ # s3 (for copying data to redshift)
20
+ aws_key_id YOUR_AWS_KEY_ID
21
+ aws_sec_key YOUR_AWS_SECRET_KEY
22
+ s3_bucket YOUR_S3_BUCKET
23
+ s3_endpoint YOUR_S3_BUCKET_END_POINT
24
+ path YOUR_S3_PATH
25
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
26
+
27
+ # redshift
28
+ redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
29
+ redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
30
+ redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
31
+ redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
32
+ redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
33
+ redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
34
+ redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
35
+ file_type [tsv|csv|json|msgpack]
36
+
37
+ # buffer
38
+ buffer_type file
39
+ buffer_path /var/log/fluent/redshift
40
+ flush_interval 15m
41
+ buffer_chunk_limit 1g
42
+ </match>
43
+
44
+ Example (watch and upload json formatted apache log):
45
+
46
+ <source>
47
+ type tail
48
+ path redshift_test.json
49
+ pos_file redshift_test_json.pos
50
+ tag redshift.json
51
+ format /^(?<log>.*)$/
52
+ </source>
53
+
54
+ <match redshift.json>
55
+ type redshift-out
56
+
57
+ # s3 (for copying data to redshift)
58
+ aws_key_id YOUR_AWS_KEY_ID
59
+ aws_sec_key YOUR_AWS_SECRET_KEY
60
+ s3_bucket hapyrus-example
61
+ s3_endpoint s3.amazonaws.com
62
+ path path/on/s3/apache_json_log/
63
+ timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
64
+
65
+ # redshift
66
+ redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
67
+ redshift_port 5439
68
+ redshift_dbname fluent-redshift-test
69
+ redshift_user fluent
70
+ redshift_password fluent-password
71
+ redshift_tablename apache_log
72
+ file_type json
73
+
74
+ # buffer
75
+ buffer_type file
76
+ buffer_path /var/log/fluent/redshift
77
+ flush_interval 15m
78
+ buffer_chunk_limit 1g
79
+ <match>
80
+
81
+ + `type` (required) : The value must be `redshift-out`.
82
+
83
+ + `aws_key_id` : AWS access key id to access s3 bucket.
84
+
85
+ + `aws_sec_key` : AWS securet key id to access s3 bucket.
86
+
87
+ + `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
88
+
89
+ + `s3_endpoint` : s3 endpoint.
90
+
91
+ + `path` (required) : s3 path to input.
92
+
93
+ + `timestamp_key_format` : The format of the object keys. It can include date-format directives.
94
+
95
+ - Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
96
+ - For example, the s3 path is as following with the above example configration.
97
+ <pre>
98
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
99
+ hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
100
+ </pre>
101
+
102
+ + `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
103
+
104
+ + `redshift_port` (required) : port number.
105
+
106
+ + `redshift_dbname` (required) : database name.
107
+
108
+ + `redshift_user` (required) : user name.
109
+
110
+ + `redshift_password` (required) : password for the user name.
111
+
112
+ + `redshift_tablename` (required) : table name to store data.
113
+
114
+ + `redshift_schemaname` : schema name to store data. By default, this option is not set and find table without schema as your own search_path.
115
+
116
+ + `redshift_connect_timeout` : maximum time to wait for connection to succeed.
117
+
118
+ + `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
119
+
120
+ + `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
121
+
122
+ + `buffer_type` : buffer type.
123
+
124
+ + `buffer_path` : path prefix of the files to buffer logs.
125
+
126
+ + `flush_interval` : flush interval.
127
+
128
+ + `buffer_chunk_limit` : limit buffer size to chunk.
129
+
130
+ + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
131
+
132
+ ## Logging examples
133
+ ```ruby
134
+ # examples by fluent-logger
135
+ require 'fluent-logger'
136
+ log = Fluent::Logger::FluentLogger.new(nil, :host => 'localhost', :port => 24224)
137
+
138
+ # file_type: csv
139
+ log.post('your.tag', :log => "12345,12345")
140
+
141
+ # file_type: tsv
142
+ log.post('your.tag', :log => "12345\t12345")
143
+
144
+ # file_type: json
145
+ require 'json'
146
+ log.post('your.tag', :log => { :user_id => 12345, :data_id => 12345 }.to_json)
147
+
148
+ # file_type: msgpack
149
+ log.post('your.tag', :user_id => 12345, :data_id => 12345)
150
+ ```
151
+
152
+ ## License
153
+
154
+ Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
155
+
156
+ [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
@@ -0,0 +1,16 @@
1
+ require "bundler"
2
+ Bundler::GemHelper.install_tasks
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.test_files = FileList['test/plugin/*.rb']
8
+ test.verbose = true
9
+ end
10
+
11
+ task :coverage do |t|
12
+ ENV['COVERAGE'] = '1'
13
+ Rake::Task["test"].invoke
14
+ end
15
+
16
+ task :default => [:build]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.1
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-redshift-out2"
6
+ gem.version = File.read("VERSION").strip
7
+ gem.authors = ["Ertugrul Yilmaz"]
8
+ gem.email = ["***.***.***@gmail.com"]
9
+ gem.description = %q{Amazon Redshift output plugin for Fluentd}
10
+ gem.summary = gem.description
11
+ gem.homepage = "https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out"
12
+ gem.has_rdoc = false
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+
19
+ gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
20
+ gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
21
+ gem.add_dependency "multi_json", "~> 1.10"
22
+ gem.add_dependency "yajl-ruby", "~> 1.2"
23
+ gem.add_dependency "pg", "~> 0.17.0"
24
+ gem.add_development_dependency "rake"
25
+ gem.add_development_dependency "simplecov", ">= 0.5.4"
26
+ gem.add_development_dependency "flexmock", ">= 1.3.1"
27
+ end
@@ -0,0 +1,397 @@
1
+ module Fluent
2
+
3
+
4
+ class RedshiftOutput < BufferedOutput
5
+ Fluent::Plugin.register_output('redshift-out', self)
6
+
7
+ NULL_CHAR_FOR_COPY = "\\N"
8
+
9
+ # ignore load table error. (invalid data format)
10
+ IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
11
+
12
+ def initialize
13
+ super
14
+ require 'aws-sdk-v1'
15
+ require 'zlib'
16
+ require 'time'
17
+ require 'tempfile'
18
+ require 'pg'
19
+ require 'csv'
20
+ require 'multi_json'
21
+ require 'yajl'
22
+ ::MultiJson.use(:yajl)
23
+ end
24
+
25
+ config_param :record_log_tag, :string, :default => 'log'
26
+ # s3
27
+ config_param :aws_key_id, :string, :secret => true, :default => nil
28
+ config_param :aws_sec_key, :string, :secret => true, :default => nil
29
+ config_param :s3_bucket, :string
30
+ config_param :s3_region, :string, :default => nil
31
+ config_param :path, :string, :default => ""
32
+ config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
33
+ config_param :utc, :bool, :default => false
34
+ # redshift
35
+ config_param :redshift_host, :string
36
+ config_param :redshift_port, :integer, :default => 5439
37
+ config_param :redshift_dbname, :string
38
+ config_param :redshift_user, :string
39
+ config_param :redshift_password, :string, :secret => true
40
+ config_param :redshift_tablename, :string
41
+ config_param :redshift_schemaname, :string, :default => nil
42
+ config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
43
+ config_param :redshift_copy_options, :string , :default => nil
44
+ config_param :redshift_connect_timeout, :integer, :default => 10
45
+ # file format
46
+ config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
47
+ config_param :delimiter, :string, :default => nil
48
+ # maintenance
49
+ config_param :maintenance_file_path, :string, :default => nil
50
+ # for debug
51
+ config_param :log_suffix, :string, :default => ''
52
+
53
+ def configure(conf)
54
+ super
55
+ @path = "#{@path}/" unless @path.end_with?('/') # append last slash
56
+ @path = @path[1..-1] if @path.start_with?('/') # remove head slash
57
+ @utc = true if conf['utc']
58
+ @db_conf = {
59
+ host:@redshift_host,
60
+ port:@redshift_port,
61
+ dbname:@redshift_dbname,
62
+ user:@redshift_user,
63
+ password:@redshift_password,
64
+ connect_timeout: @redshift_connect_timeout
65
+ }
66
+ @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
67
+ $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
68
+ @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
69
+ @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
70
+ end
71
+
72
+ def start
73
+ super
74
+ # init s3 conf
75
+ options = {}
76
+ if @aws_key_id && @aws_sec_key
77
+ options[:access_key_id] = @aws_key_id
78
+ options[:secret_access_key] = @aws_sec_key
79
+ end
80
+ options[:region] = @s3_region if @s3_region
81
+ @s3 = AWS::S3.new(options)
82
+ @bucket = @s3.buckets[@s3_bucket]
83
+ @redshift_connection = RedshiftConnection.new(@db_conf)
84
+ @redshift_connection.connect_start
85
+ end
86
+
87
+ def format(tag, time, record)
88
+ if json?
89
+ record.to_msgpack
90
+ elsif msgpack?
91
+ { @record_log_tag => record }.to_msgpack
92
+ else
93
+ "#{record[@record_log_tag]}\n"
94
+ end
95
+ end
96
+
97
+ def write(chunk)
98
+ $log.debug format_log("start creating gz.")
99
+ @maintenance_monitor.check_maintenance!
100
+
101
+ # create a gz file
102
+ tmp = Tempfile.new("s3-")
103
+ tmp =
104
+ if json? || msgpack?
105
+ create_gz_file_from_structured_data(tmp, chunk, @delimiter)
106
+ else
107
+ create_gz_file_from_flat_data(tmp, chunk)
108
+ end
109
+
110
+ # no data -> skip
111
+ unless tmp
112
+ $log.debug format_log("received no valid data. ")
113
+ return false # for debug
114
+ end
115
+
116
+ # create a file path with time format
117
+ s3path = create_s3path(@bucket, @path)
118
+
119
+ # upload gz to s3
120
+ @bucket.objects[s3path].write(Pathname.new(tmp.path),
121
+ :acl => :bucket_owner_full_control)
122
+
123
+ # close temp file
124
+ tmp.close!
125
+
126
+ # copy gz on s3 to redshift
127
+ s3_uri = "s3://#{@s3_bucket}/#{s3path}"
128
+ credentials = @s3.client.credential_provider.credentials
129
+ sql = "copy #{@table_name_with_schema} from '#{s3_uri}'"
130
+ sql += " CREDENTIALS 'aws_access_key_id=#{credentials[:access_key_id]};aws_secret_access_key=#{credentials[:secret_access_key]}"
131
+ sql += ";token=#{credentials[:session_token]}" if credentials[:session_token]
132
+ sql += "' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
133
+
134
+ $log.debug format_log("start copying. s3_uri=#{s3_uri}")
135
+
136
+ begin
137
+ @redshift_connection.exec(sql)
138
+ $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
139
+ rescue RedshiftError => e
140
+ if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
141
+ $log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
142
+ return false # for debug
143
+ end
144
+ raise e
145
+ end
146
+ true # for debug
147
+ end
148
+
149
+ protected
150
+
151
+ def format_log(message)
152
+ (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
153
+ end
154
+
155
+ private
156
+
157
+ def json?
158
+ @file_type == 'json'
159
+ end
160
+
161
+ def msgpack?
162
+ @file_type == 'msgpack'
163
+ end
164
+
165
+ def create_gz_file_from_flat_data(dst_file, chunk)
166
+ gzw = nil
167
+ begin
168
+ gzw = Zlib::GzipWriter.new(dst_file)
169
+ chunk.write_to(gzw)
170
+ ensure
171
+ gzw.close rescue nil if gzw
172
+ end
173
+ dst_file
174
+ end
175
+
176
+ def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
177
+ # fetch the table definition from redshift
178
+ redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
179
+ if redshift_table_columns == nil
180
+ raise "failed to fetch the redshift table definition."
181
+ elsif redshift_table_columns.empty?
182
+ $log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
183
+ return nil
184
+ end
185
+
186
+ # convert json to tsv format text
187
+ gzw = nil
188
+ begin
189
+ gzw = Zlib::GzipWriter.new(dst_file)
190
+ chunk.msgpack_each do |record|
191
+ next unless record
192
+ begin
193
+ hash = json? ? json_to_hash(record[@record_log_tag]) : record[@record_log_tag]
194
+ tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
195
+ gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
196
+ rescue => e
197
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
198
+ $log.error_backtrace
199
+ end
200
+ end
201
+ return nil unless gzw.pos > 0
202
+ ensure
203
+ gzw.close rescue nil if gzw
204
+ end
205
+ dst_file
206
+ end
207
+
208
+ def determine_delimiter(file_type)
209
+ case file_type
210
+ when 'json', 'msgpack', 'tsv'
211
+ "\t"
212
+ when "csv"
213
+ ','
214
+ else
215
+ raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
216
+ end
217
+ end
218
+
219
+ def json_to_hash(json_text)
220
+ return nil if json_text.to_s.empty?
221
+
222
+ MultiJson.load(json_text)
223
+ rescue => e
224
+ $log.warn format_log("failed to parse json. "), :error => e.to_s
225
+ nil
226
+ end
227
+
228
+ def hash_to_table_text(redshift_table_columns, hash, delimiter)
229
+ return "" unless hash
230
+
231
+ # extract values from hash
232
+ val_list = redshift_table_columns.collect {|cn| hash[cn]}
233
+
234
+ if val_list.all?{|v| v.nil?}
235
+ $log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
236
+ return ""
237
+ end
238
+
239
+ generate_line_with_delimiter(val_list, delimiter)
240
+ end
241
+
242
+ def generate_line_with_delimiter(val_list, delimiter)
243
+ val_list.collect do |val|
244
+ case val
245
+ when nil
246
+ NULL_CHAR_FOR_COPY
247
+ when ''
248
+ ''
249
+ when Hash, Array
250
+ escape_text_for_copy(MultiJson.dump(val))
251
+ else
252
+ escape_text_for_copy(val.to_s)
253
+ end
254
+ end.join(delimiter) + "\n"
255
+ end
256
+
257
+ def escape_text_for_copy(val)
258
+ val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
259
+ end
260
+
261
+ def create_s3path(bucket, path)
262
+ timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
263
+ i = 0
264
+ begin
265
+ suffix = "_#{'%02d' % i}"
266
+ s3path = "#{path}#{timestamp_key}#{suffix}.gz"
267
+ i += 1
268
+ end while bucket.objects[s3path].exists?
269
+ s3path
270
+ end
271
+
272
+ class RedshiftError < StandardError
273
+ def initialize(msg)
274
+ case msg
275
+ when PG::Error
276
+ @pg_error = msg
277
+ super(msg.to_s)
278
+ set_backtrace(msg.backtrace)
279
+ else
280
+ super
281
+ end
282
+ end
283
+
284
+ attr_accessor :pg_error
285
+ end
286
+
287
+ class RedshiftConnection
288
+ REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
289
+
290
+ def initialize(db_conf)
291
+ @db_conf = db_conf
292
+ @connection = nil
293
+ ObjectSpace.define_finalizer(self) {
294
+ close()
295
+ }
296
+ end
297
+
298
+ attr_reader :db_conf
299
+
300
+ def fetch_table_columns(table_name, schema_name)
301
+ columns = nil
302
+ exec(fetch_columns_sql(table_name, schema_name)) do |result|
303
+ columns = result.collect{|row| row['column_name']}
304
+ end
305
+ columns
306
+ end
307
+
308
+ def exec(sql, &block)
309
+ conn = @connection
310
+ conn = create_redshift_connection if conn.nil?
311
+ if block
312
+ conn.exec(sql) {|result| block.call(result)}
313
+ else
314
+ conn.exec(sql)
315
+ end
316
+ rescue PG::Error => e
317
+ raise RedshiftError.new(e)
318
+ ensure
319
+ conn.close if conn && @connection.nil?
320
+ end
321
+
322
+ def connect_start
323
+ @connection = create_redshift_connection
324
+ end
325
+
326
+ def close
327
+ @connection.close rescue nil if @connection
328
+ @connection = nil
329
+ end
330
+
331
+ private
332
+
333
+ def create_redshift_connection
334
+ hostaddr = IPSocket.getaddress(db_conf[:host])
335
+ db_conf[:hostaddr] = hostaddr
336
+
337
+ conn = PG::Connection.connect_start(db_conf)
338
+ raise RedshiftError.new("Unable to create a new connection.") unless conn
339
+ if conn.status == PG::CONNECTION_BAD
340
+ raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
341
+ end
342
+
343
+ socket = conn.socket_io
344
+ poll_status = PG::PGRES_POLLING_WRITING
345
+ until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
346
+ case poll_status
347
+ when PG::PGRES_POLLING_READING
348
+ IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
349
+ raise RedshiftError.new("Asynchronous connection timed out!(READING)")
350
+ when PG::PGRES_POLLING_WRITING
351
+ IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
352
+ raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
353
+ end
354
+ poll_status = conn.connect_poll
355
+ end
356
+
357
+ unless conn.status == PG::CONNECTION_OK
358
+ raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
359
+ end
360
+
361
+ conn
362
+ rescue => e
363
+ conn.close rescue nil if conn
364
+ raise RedshiftError.new(e) if e.kind_of?(PG::Error)
365
+ raise e
366
+ end
367
+
368
+ def fetch_columns_sql(table_name, schema_name = nil)
369
+ sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
370
+ sql << " and table_schema = '#{schema_name}'" if schema_name
371
+ sql << " order by ordinal_position;"
372
+ sql
373
+ end
374
+ end
375
+
376
+ class MaintenanceError < StandardError
377
+ end
378
+
379
+ class MaintenanceMonitor
380
+ def initialize(maintenance_file_path)
381
+ @file_path = maintenance_file_path
382
+ end
383
+
384
+ def in_maintenance?
385
+ !!(@file_path && File.exists?(@file_path))
386
+ end
387
+
388
+ def check_maintenance!
389
+ if in_maintenance?
390
+ raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
391
+ end
392
+ end
393
+ end
394
+ end
395
+
396
+
397
+ end
@@ -0,0 +1,503 @@
1
+ require 'test_helper'
2
+
3
+ require 'fluent/test'
4
+ require 'fluent/plugin/out_redshift-out'
5
+ require 'flexmock/test_unit'
6
+ require 'zlib'
7
+
8
+
9
+ class RedshiftOutputTest < Test::Unit::TestCase
10
+ def setup
11
+ require 'aws-sdk-v1'
12
+ require 'pg'
13
+ require 'csv'
14
+ Fluent::Test.setup
15
+ PG::Error.module_eval { attr_accessor :result}
16
+ end
17
+
18
+ MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
19
+
20
+ CONFIG_BASE= %[
21
+ aws_key_id test_key_id
22
+ aws_sec_key test_sec_key
23
+ s3_bucket test_bucket
24
+ path log
25
+ redshift_host test_host
26
+ redshift_dbname test_db
27
+ redshift_user test_user
28
+ redshift_password test_password
29
+ redshift_tablename test_table
30
+ buffer_type memory
31
+ utc
32
+ log_suffix id:5 host:localhost
33
+ maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
34
+ ]
35
+ CONFIG_CSV= %[
36
+ #{CONFIG_BASE}
37
+ file_type csv
38
+ ]
39
+ CONFIG_TSV= %[
40
+ #{CONFIG_BASE}
41
+ file_type tsv
42
+ ]
43
+ CONFIG_JSON = %[
44
+ #{CONFIG_BASE}
45
+ file_type json
46
+ ]
47
+ CONFIG_JSON_WITH_SCHEMA = %[
48
+ #{CONFIG_BASE}
49
+ redshift_schemaname test_schema
50
+ file_type json
51
+ ]
52
+ CONFIG_MSGPACK = %[
53
+ #{CONFIG_BASE}
54
+ file_type msgpack
55
+ ]
56
+ CONFIG_PIPE_DELIMITER= %[
57
+ #{CONFIG_BASE}
58
+ delimiter |
59
+ ]
60
+ CONFIG_PIPE_DELIMITER_WITH_NAME= %[
61
+ #{CONFIG_BASE}
62
+ file_type pipe
63
+ delimiter |
64
+ ]
65
+ CONFIG=CONFIG_CSV
66
+
67
+ RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
68
+ RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
69
+ RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
70
+ RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
71
+ RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
72
+ RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
73
+ RECORD_MSGPACK_A = {"key_a" => "val_a", "key_b" => "val_b"}
74
+ RECORD_MSGPACK_B = {"key_c" => "val_c", "key_d" => "val_d"}
75
+ DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
76
+
77
+ def create_driver(conf = CONFIG, tag='test.input')
78
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
79
+ end
80
+
81
+ def create_driver_no_write(conf = CONFIG, tag='test.input')
82
+ Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
83
+ def write(chunk)
84
+ chunk.read
85
+ end
86
+ end.configure(conf)
87
+ end
88
+
89
+ def test_configure
90
+ assert_raise(Fluent::ConfigError) {
91
+ d = create_driver('')
92
+ }
93
+ assert_raise(Fluent::ConfigError) {
94
+ d = create_driver(CONFIG_BASE)
95
+ }
96
+ d = create_driver(CONFIG_CSV)
97
+ assert_equal "test_key_id", d.instance.aws_key_id
98
+ assert_equal "test_sec_key", d.instance.aws_sec_key
99
+ assert_equal "test_bucket", d.instance.s3_bucket
100
+ assert_equal "log/", d.instance.path
101
+ assert_equal "test_host", d.instance.redshift_host
102
+ assert_equal 5439, d.instance.redshift_port
103
+ assert_equal "test_db", d.instance.redshift_dbname
104
+ assert_equal "test_user", d.instance.redshift_user
105
+ assert_equal "test_password", d.instance.redshift_password
106
+ assert_equal "test_table", d.instance.redshift_tablename
107
+ assert_equal nil, d.instance.redshift_schemaname
108
+ assert_equal "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS", d.instance.redshift_copy_base_options
109
+ assert_equal nil, d.instance.redshift_copy_options
110
+ assert_equal "csv", d.instance.file_type
111
+ assert_equal ",", d.instance.delimiter
112
+ assert_equal true, d.instance.utc
113
+ assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
114
+ end
115
+ def test_configure_with_schemaname
116
+ d = create_driver(CONFIG_JSON_WITH_SCHEMA)
117
+ assert_equal "test_schema", d.instance.redshift_schemaname
118
+ end
119
+ def test_configure_localtime
120
+ d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
121
+ assert_equal false, d.instance.utc
122
+ end
123
+ def test_configure_no_path
124
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
125
+ assert_equal "", d.instance.path
126
+ end
127
+ def test_configure_root_path
128
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
129
+ assert_equal "", d.instance.path
130
+ end
131
+ def test_configure_path_with_slash
132
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
133
+ assert_equal "log/", d.instance.path
134
+ end
135
+ def test_configure_path_starts_with_slash
136
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log/'))
137
+ assert_equal "log/", d.instance.path
138
+ end
139
+ def test_configure_path_starts_with_slash_without_last_slash
140
+ d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log'))
141
+ assert_equal "log/", d.instance.path
142
+ end
143
+ def test_configure_tsv
144
+ d1 = create_driver(CONFIG_TSV)
145
+ assert_equal "tsv", d1.instance.file_type
146
+ assert_equal "\t", d1.instance.delimiter
147
+ end
148
+ def test_configure_json
149
+ d2 = create_driver(CONFIG_JSON)
150
+ assert_equal "json", d2.instance.file_type
151
+ assert_equal "\t", d2.instance.delimiter
152
+ end
153
+ def test_configure_msgpack
154
+ d2 = create_driver(CONFIG_MSGPACK)
155
+ assert_equal "msgpack", d2.instance.file_type
156
+ assert_equal "\t", d2.instance.delimiter
157
+ end
158
+ def test_configure_original_file_type
159
+ d3 = create_driver(CONFIG_PIPE_DELIMITER)
160
+ assert_equal nil, d3.instance.file_type
161
+ assert_equal "|", d3.instance.delimiter
162
+
163
+ d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
164
+ assert_equal "pipe", d4.instance.file_type
165
+ assert_equal "|", d4.instance.delimiter
166
+ end
167
+ def test_configure_no_log_suffix
168
+ d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
169
+ assert_equal "", d.instance.log_suffix
170
+ end
171
+
172
+ def emit_csv(d)
173
+ d.emit(RECORD_CSV_A, DEFAULT_TIME)
174
+ d.emit(RECORD_CSV_B, DEFAULT_TIME)
175
+ end
176
+ def emit_tsv(d)
177
+ d.emit(RECORD_TSV_A, DEFAULT_TIME)
178
+ d.emit(RECORD_TSV_B, DEFAULT_TIME)
179
+ end
180
+ def emit_json(d)
181
+ d.emit(RECORD_JSON_A, DEFAULT_TIME)
182
+ d.emit(RECORD_JSON_B, DEFAULT_TIME)
183
+ end
184
+ def emit_msgpack(d)
185
+ d.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
186
+ d.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
187
+ end
188
+
189
+ def test_format_csv
190
+ setup_mocks("")
191
+ d_csv = create_driver_no_write(CONFIG_CSV)
192
+ emit_csv(d_csv)
193
+ d_csv.expect_format RECORD_CSV_A['log'] + "\n"
194
+ d_csv.expect_format RECORD_CSV_B['log'] + "\n"
195
+ d_csv.run
196
+ end
197
+ def test_format_tsv
198
+ setup_mocks("")
199
+ d_tsv = create_driver_no_write(CONFIG_TSV)
200
+ emit_tsv(d_tsv)
201
+ d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
202
+ d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
203
+ d_tsv.run
204
+ end
205
+ def test_format_json
206
+ setup_mocks("")
207
+ d_json = create_driver_no_write(CONFIG_JSON)
208
+ emit_json(d_json)
209
+ d_json.expect_format RECORD_JSON_A.to_msgpack
210
+ d_json.expect_format RECORD_JSON_B.to_msgpack
211
+ d_json.run
212
+ end
213
+
214
+ def test_format_msgpack
215
+ setup_mocks("")
216
+ d_msgpack = create_driver_no_write(CONFIG_MSGPACK)
217
+ emit_msgpack(d_msgpack)
218
+ d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_A }.to_msgpack)
219
+ d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_B }.to_msgpack)
220
+ d_msgpack.run
221
+ end
222
+
223
+ def setup_redshift_connection_mock(options = {})
224
+ options ||= {}
225
+ column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
226
+ schema_name = options[:schema_name]
227
+ table_name = options[:table_name] || 'test_table'
228
+ exec_sql_proc = options[:exec_sql_proc]
229
+
230
+ column_list_query_regex =
231
+ if schema_name
232
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
233
+ else
234
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
235
+ end
236
+ copy_query_regex =
237
+ if schema_name
238
+ /\Acopy #{schema_name}.#{table_name} from/
239
+ else
240
+ /\Acopy #{table_name} from/
241
+ end
242
+
243
+ flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
244
+ conn.should_receive(:exec).and_return do |sql, block|
245
+ if exec_sql_proc
246
+ exec_sql_proc.call(sql, block)
247
+ elsif block
248
+ if sql =~ column_list_query_regex
249
+ block.call column_names.collect{|key| {'column_name' => key}}
250
+ else
251
+ block.call []
252
+ end
253
+ else
254
+ unless sql =~ copy_query_regex
255
+ error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
256
+ error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
257
+ raise Fluent::RedshiftOutput::RedshiftError.new(error)
258
+ end
259
+ end
260
+ end
261
+ conn.should_receive(:connect_start)
262
+ end
263
+ end
264
+
265
+ def setup_s3_mock(expected_data)
266
+ current_time = Time.now
267
+
268
+ # create mock of s3 object
269
+ s3obj = flexmock(AWS::S3::S3Object)
270
+ s3obj.should_receive(:exists?).with_any_args.and_return { false }
271
+ s3obj.should_receive(:write).with(
272
+ # pathname
273
+ on { |pathname|
274
+ data = nil
275
+ pathname.open { |f|
276
+ gz = Zlib::GzipReader.new(f)
277
+ data = gz.read
278
+ gz.close
279
+ }
280
+ assert_equal expected_data, data
281
+ },
282
+ :acl => :bucket_owner_full_control
283
+ ).and_return { true }
284
+
285
+ # create mock of s3 object collection
286
+ s3obj_col = flexmock(AWS::S3::ObjectCollection)
287
+ s3obj_col.should_receive(:[]).with(
288
+ on { |key|
289
+ expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
290
+ key == expected_key
291
+ }).
292
+ and_return {
293
+ s3obj
294
+ }
295
+
296
+ # create mock of s3 bucket
297
+ flexmock(AWS::S3::Bucket).new_instances do |bucket|
298
+ bucket.should_receive(:objects).with_any_args.
299
+ and_return {
300
+ s3obj_col
301
+ }
302
+ end
303
+ end
304
+
305
+ def setup_tempfile_mock_to_be_closed
306
+ flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
307
+ end
308
+
309
+ def setup_mocks(expected_data, options = {})
310
+ setup_redshift_connection_mock(options)
311
+ setup_s3_mock(expected_data)
312
+ end
313
+
314
+ def test_write_with_csv
315
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
316
+ setup_tempfile_mock_to_be_closed
317
+ d_csv = create_driver
318
+ emit_csv(d_csv)
319
+ assert_equal true, d_csv.run
320
+ end
321
+
322
+ def test_write_with_json
323
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
324
+ setup_tempfile_mock_to_be_closed
325
+ d_json = create_driver(CONFIG_JSON)
326
+ emit_json(d_json)
327
+ assert_equal true, d_json.run
328
+ end
329
+
330
+ def test_write_with_json_hash_value
331
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
332
+ d_json = create_driver(CONFIG_JSON)
333
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
334
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
335
+ assert_equal true, d_json.run
336
+ end
337
+
338
+ def test_write_with_json_array_value
339
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
340
+ d_json = create_driver(CONFIG_JSON)
341
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
342
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
343
+ assert_equal true, d_json.run
344
+ end
345
+
346
+ def test_write_with_json_including_tab_newline_quote
347
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
348
+ d_json = create_driver(CONFIG_JSON)
349
+ d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
350
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
351
+ assert_equal true, d_json.run
352
+ end
353
+
354
+ def test_write_with_json_empty_text_value
355
+ setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
356
+ d_json = create_driver(CONFIG_JSON)
357
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
358
+ assert_equal true, d_json.run
359
+ end
360
+
361
+ def test_write_with_json_no_data
362
+ setup_mocks("")
363
+ d_json = create_driver(CONFIG_JSON)
364
+ d_json.emit("", DEFAULT_TIME)
365
+ d_json.emit("", DEFAULT_TIME)
366
+ assert_equal false, d_json.run
367
+ end
368
+
369
+ def test_write_with_json_invalid_one_line
370
+ setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
371
+ d_json = create_driver(CONFIG_JSON)
372
+ d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
373
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
374
+ assert_equal true, d_json.run
375
+ end
376
+
377
+ def test_write_with_json_no_available_data
378
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
379
+ d_json = create_driver(CONFIG_JSON)
380
+ d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
381
+ d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
382
+ assert_equal true, d_json.run
383
+ end
384
+
385
+ def test_write_with_msgpack
386
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
387
+ d_msgpack = create_driver(CONFIG_MSGPACK)
388
+ emit_msgpack(d_msgpack)
389
+ assert_equal true, d_msgpack.run
390
+ end
391
+
392
+ def test_write_with_msgpack_hash_value
393
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
394
+ d_msgpack = create_driver(CONFIG_MSGPACK)
395
+ d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
396
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
397
+ assert_equal true, d_msgpack.run
398
+ end
399
+
400
+ def test_write_with_msgpack_array_value
401
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
402
+ d_msgpack = create_driver(CONFIG_MSGPACK)
403
+ d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
404
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
405
+ assert_equal true, d_msgpack.run
406
+ end
407
+
408
+ def test_write_with_msgpack_including_tab_newline_quote
409
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
410
+ d_msgpack = create_driver(CONFIG_MSGPACK)
411
+ d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
412
+ d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
413
+ assert_equal true, d_msgpack.run
414
+ end
415
+
416
+ def test_write_with_msgpack_no_data
417
+ setup_mocks("")
418
+ d_msgpack = create_driver(CONFIG_MSGPACK)
419
+ d_msgpack.emit({}, DEFAULT_TIME)
420
+ d_msgpack.emit({}, DEFAULT_TIME)
421
+ assert_equal false, d_msgpack.run
422
+ end
423
+
424
+ def test_write_with_msgpack_no_available_data
425
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
426
+ d_msgpack = create_driver(CONFIG_MSGPACK)
427
+ d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
428
+ d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
429
+ assert_equal true, d_msgpack.run
430
+ end
431
+
432
+ def test_write_redshift_connection_error
433
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
434
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
435
+ d_csv = create_driver
436
+ emit_csv(d_csv)
437
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
438
+ d_csv.run
439
+ }
440
+ end
441
+
442
+ def test_write_redshift_load_error
443
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
444
+ exec_sql_proc: Proc.new {|sql, block|
445
+ msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
446
+ raise Fluent::RedshiftOutput::RedshiftError.new(msg)
447
+ })
448
+
449
+ d_csv = create_driver
450
+ emit_csv(d_csv)
451
+ assert_equal false, d_csv.run
452
+ end
453
+
454
+ def test_write_with_json_redshift_connection_error
455
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
456
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
457
+
458
+ d_json = create_driver(CONFIG_JSON)
459
+ emit_json(d_json)
460
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
461
+ d_json.run
462
+ }
463
+ end
464
+
465
+ def test_write_with_json_no_table_on_redshift
466
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
467
+ exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
468
+
469
+ d_json = create_driver(CONFIG_JSON)
470
+ emit_json(d_json)
471
+ assert_equal false, d_json.run
472
+ end
473
+
474
+ def test_write_with_json_failed_to_get_columns
475
+ setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
476
+
477
+ d_json = create_driver(CONFIG_JSON)
478
+ emit_json(d_json)
479
+ assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
480
+ d_json.run
481
+ }
482
+ end
483
+
484
+ def test_write_with_json_fetch_column_with_schema
485
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
486
+ schema_name: 'test_schema')
487
+ d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
488
+ emit_json(d_json)
489
+ assert_equal true, d_json.run
490
+ end
491
+
492
+ def test_maintenance_mode
493
+ setup_mocks("")
494
+ flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
495
+
496
+ d_json = create_driver(CONFIG_JSON)
497
+ emit_json(d_json)
498
+ assert_raise(Fluent::RedshiftOutput::MaintenanceError,
499
+ "Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
500
+ d_json.run
501
+ }
502
+ end
503
+ end
@@ -0,0 +1,8 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter 'test/'
5
+ add_filter 'pkg/'
6
+ add_filter 'vendor/'
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-redshift-out2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - Ertugrul Yilmaz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-07-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.10.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 0.10.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: aws-sdk-v1
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 1.6.3
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.6.3
47
+ - !ruby/object:Gem::Dependency
48
+ name: multi_json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.10'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.10'
61
+ - !ruby/object:Gem::Dependency
62
+ name: yajl-ruby
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.2'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.2'
75
+ - !ruby/object:Gem::Dependency
76
+ name: pg
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 0.17.0
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: 0.17.0
89
+ - !ruby/object:Gem::Dependency
90
+ name: rake
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ - !ruby/object:Gem::Dependency
104
+ name: simplecov
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.5.4
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: 0.5.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: flexmock
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: 1.3.1
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: 1.3.1
131
+ description: Amazon Redshift output plugin for Fluentd
132
+ email:
133
+ - "***.***.***@gmail.com"
134
+ executables: []
135
+ extensions: []
136
+ extra_rdoc_files: []
137
+ files:
138
+ - ".gitignore"
139
+ - Gemfile
140
+ - README.md
141
+ - Rakefile
142
+ - VERSION
143
+ - fluent-plugin-redshift-out.gemspec
144
+ - lib/fluent/plugin/out_redshift-out.rb
145
+ - test/plugin/test_out_redshift.rb
146
+ - test/test_helper.rb
147
+ homepage: https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out
148
+ licenses: []
149
+ metadata: {}
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - ">="
162
+ - !ruby/object:Gem::Version
163
+ version: '0'
164
+ requirements: []
165
+ rubygems_version: 3.0.3
166
+ signing_key:
167
+ specification_version: 4
168
+ summary: Amazon Redshift output plugin for Fluentd
169
+ test_files:
170
+ - test/plugin/test_out_redshift.rb
171
+ - test/test_helper.rb