fluent-plugin-redshift-out2 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/Gemfile +3 -0
- data/README.md +156 -0
- data/Rakefile +16 -0
- data/VERSION +1 -0
- data/fluent-plugin-redshift-out.gemspec +27 -0
- data/lib/fluent/plugin/out_redshift-out.rb +397 -0
- data/test/plugin/test_out_redshift.rb +503 -0
- data/test/test_helper.rb +8 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1041bb98af3d4cd15dbe1d4886a31b68bc2c1691771247b589373792429608a7
|
4
|
+
data.tar.gz: d8162bac6c674de9760c16c4f2f6ce381c13d6d3a9b52ea7fa170fba11d21cb0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a28646dd88bc2b290bc38ae8860e49f7f841c0a4c24a78f0df98b5c41d2f82919ba5dafe25b5b6d3d16ce66b4d7515792fe506406940f026afe209027e4c94ef
|
7
|
+
data.tar.gz: 3a421e6727ee9f020f8a6a257ed3d966c611d003daa17ed88d288c210a497d3e9317f5fa49dd80b2339577e0b089280e0a2b578404c3e8007a98a503136bb611
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
Amazon Redshift output plugin for Fluentd
|
2
|
+
========
|
3
|
+
|
4
|
+
## Overview
|
5
|
+
|
6
|
+
Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
gem install fluent-plugin-redshift-out
|
11
|
+
|
12
|
+
## Configuration
|
13
|
+
|
14
|
+
Format:
|
15
|
+
|
16
|
+
<match my.tag>
|
17
|
+
type redshift-out
|
18
|
+
|
19
|
+
# s3 (for copying data to redshift)
|
20
|
+
aws_key_id YOUR_AWS_KEY_ID
|
21
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
22
|
+
s3_bucket YOUR_S3_BUCKET
|
23
|
+
s3_endpoint YOUR_S3_BUCKET_END_POINT
|
24
|
+
path YOUR_S3_PATH
|
25
|
+
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
26
|
+
|
27
|
+
# redshift
|
28
|
+
redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
|
29
|
+
redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
|
30
|
+
redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
|
31
|
+
redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
|
32
|
+
redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
|
33
|
+
redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
|
34
|
+
redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
|
35
|
+
file_type [tsv|csv|json|msgpack]
|
36
|
+
|
37
|
+
# buffer
|
38
|
+
buffer_type file
|
39
|
+
buffer_path /var/log/fluent/redshift
|
40
|
+
flush_interval 15m
|
41
|
+
buffer_chunk_limit 1g
|
42
|
+
</match>
|
43
|
+
|
44
|
+
Example (watch and upload json formatted apache log):
|
45
|
+
|
46
|
+
<source>
|
47
|
+
type tail
|
48
|
+
path redshift_test.json
|
49
|
+
pos_file redshift_test_json.pos
|
50
|
+
tag redshift.json
|
51
|
+
format /^(?<log>.*)$/
|
52
|
+
</source>
|
53
|
+
|
54
|
+
<match redshift.json>
|
55
|
+
type redshift-out
|
56
|
+
|
57
|
+
# s3 (for copying data to redshift)
|
58
|
+
aws_key_id YOUR_AWS_KEY_ID
|
59
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
60
|
+
s3_bucket hapyrus-example
|
61
|
+
s3_endpoint s3.amazonaws.com
|
62
|
+
path path/on/s3/apache_json_log/
|
63
|
+
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
64
|
+
|
65
|
+
# redshift
|
66
|
+
redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
|
67
|
+
redshift_port 5439
|
68
|
+
redshift_dbname fluent-redshift-test
|
69
|
+
redshift_user fluent
|
70
|
+
redshift_password fluent-password
|
71
|
+
redshift_tablename apache_log
|
72
|
+
file_type json
|
73
|
+
|
74
|
+
# buffer
|
75
|
+
buffer_type file
|
76
|
+
buffer_path /var/log/fluent/redshift
|
77
|
+
flush_interval 15m
|
78
|
+
buffer_chunk_limit 1g
|
79
|
+
<match>
|
80
|
+
|
81
|
+
+ `type` (required) : The value must be `redshift-out`.
|
82
|
+
|
83
|
+
+ `aws_key_id` : AWS access key id to access s3 bucket.
|
84
|
+
|
85
|
+
+ `aws_sec_key` : AWS securet key id to access s3 bucket.
|
86
|
+
|
87
|
+
+ `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
|
88
|
+
|
89
|
+
+ `s3_endpoint` : s3 endpoint.
|
90
|
+
|
91
|
+
+ `path` (required) : s3 path to input.
|
92
|
+
|
93
|
+
+ `timestamp_key_format` : The format of the object keys. It can include date-format directives.
|
94
|
+
|
95
|
+
- Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
|
96
|
+
- For example, the s3 path is as following with the above example configration.
|
97
|
+
<pre>
|
98
|
+
hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
|
99
|
+
hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
|
100
|
+
</pre>
|
101
|
+
|
102
|
+
+ `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
|
103
|
+
|
104
|
+
+ `redshift_port` (required) : port number.
|
105
|
+
|
106
|
+
+ `redshift_dbname` (required) : database name.
|
107
|
+
|
108
|
+
+ `redshift_user` (required) : user name.
|
109
|
+
|
110
|
+
+ `redshift_password` (required) : password for the user name.
|
111
|
+
|
112
|
+
+ `redshift_tablename` (required) : table name to store data.
|
113
|
+
|
114
|
+
+ `redshift_schemaname` : schema name to store data. By default, this option is not set and find table without schema as your own search_path.
|
115
|
+
|
116
|
+
+ `redshift_connect_timeout` : maximum time to wait for connection to succeed.
|
117
|
+
|
118
|
+
+ `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
|
119
|
+
|
120
|
+
+ `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
|
121
|
+
|
122
|
+
+ `buffer_type` : buffer type.
|
123
|
+
|
124
|
+
+ `buffer_path` : path prefix of the files to buffer logs.
|
125
|
+
|
126
|
+
+ `flush_interval` : flush interval.
|
127
|
+
|
128
|
+
+ `buffer_chunk_limit` : limit buffer size to chunk.
|
129
|
+
|
130
|
+
+ `utc` : utc time zone. This parameter affects `timestamp_key_format`.
|
131
|
+
|
132
|
+
## Logging examples
|
133
|
+
```ruby
|
134
|
+
# examples by fluent-logger
|
135
|
+
require 'fluent-logger'
|
136
|
+
log = Fluent::Logger::FluentLogger.new(nil, :host => 'localhost', :port => 24224)
|
137
|
+
|
138
|
+
# file_type: csv
|
139
|
+
log.post('your.tag', :log => "12345,12345")
|
140
|
+
|
141
|
+
# file_type: tsv
|
142
|
+
log.post('your.tag', :log => "12345\t12345")
|
143
|
+
|
144
|
+
# file_type: json
|
145
|
+
require 'json'
|
146
|
+
log.post('your.tag', :log => { :user_id => 12345, :data_id => 12345 }.to_json)
|
147
|
+
|
148
|
+
# file_type: msgpack
|
149
|
+
log.post('your.tag', :user_id => 12345, :data_id => 12345)
|
150
|
+
```
|
151
|
+
|
152
|
+
## License
|
153
|
+
|
154
|
+
Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
|
155
|
+
|
156
|
+
[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler"
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.test_files = FileList['test/plugin/*.rb']
|
8
|
+
test.verbose = true
|
9
|
+
end
|
10
|
+
|
11
|
+
task :coverage do |t|
|
12
|
+
ENV['COVERAGE'] = '1'
|
13
|
+
Rake::Task["test"].invoke
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => [:build]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.1
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "fluent-plugin-redshift-out2"
|
6
|
+
gem.version = File.read("VERSION").strip
|
7
|
+
gem.authors = ["Ertugrul Yilmaz"]
|
8
|
+
gem.email = ["***.***.***@gmail.com"]
|
9
|
+
gem.description = %q{Amazon Redshift output plugin for Fluentd}
|
10
|
+
gem.summary = gem.description
|
11
|
+
gem.homepage = "https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out"
|
12
|
+
gem.has_rdoc = false
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
|
20
|
+
gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
|
21
|
+
gem.add_dependency "multi_json", "~> 1.10"
|
22
|
+
gem.add_dependency "yajl-ruby", "~> 1.2"
|
23
|
+
gem.add_dependency "pg", "~> 0.17.0"
|
24
|
+
gem.add_development_dependency "rake"
|
25
|
+
gem.add_development_dependency "simplecov", ">= 0.5.4"
|
26
|
+
gem.add_development_dependency "flexmock", ">= 1.3.1"
|
27
|
+
end
|
@@ -0,0 +1,397 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
|
4
|
+
class RedshiftOutput < BufferedOutput
|
5
|
+
Fluent::Plugin.register_output('redshift-out', self)
|
6
|
+
|
7
|
+
NULL_CHAR_FOR_COPY = "\\N"
|
8
|
+
|
9
|
+
# ignore load table error. (invalid data format)
|
10
|
+
IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
super
|
14
|
+
require 'aws-sdk-v1'
|
15
|
+
require 'zlib'
|
16
|
+
require 'time'
|
17
|
+
require 'tempfile'
|
18
|
+
require 'pg'
|
19
|
+
require 'csv'
|
20
|
+
require 'multi_json'
|
21
|
+
require 'yajl'
|
22
|
+
::MultiJson.use(:yajl)
|
23
|
+
end
|
24
|
+
|
25
|
+
config_param :record_log_tag, :string, :default => 'log'
|
26
|
+
# s3
|
27
|
+
config_param :aws_key_id, :string, :secret => true, :default => nil
|
28
|
+
config_param :aws_sec_key, :string, :secret => true, :default => nil
|
29
|
+
config_param :s3_bucket, :string
|
30
|
+
config_param :s3_region, :string, :default => nil
|
31
|
+
config_param :path, :string, :default => ""
|
32
|
+
config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
|
33
|
+
config_param :utc, :bool, :default => false
|
34
|
+
# redshift
|
35
|
+
config_param :redshift_host, :string
|
36
|
+
config_param :redshift_port, :integer, :default => 5439
|
37
|
+
config_param :redshift_dbname, :string
|
38
|
+
config_param :redshift_user, :string
|
39
|
+
config_param :redshift_password, :string, :secret => true
|
40
|
+
config_param :redshift_tablename, :string
|
41
|
+
config_param :redshift_schemaname, :string, :default => nil
|
42
|
+
config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
|
43
|
+
config_param :redshift_copy_options, :string , :default => nil
|
44
|
+
config_param :redshift_connect_timeout, :integer, :default => 10
|
45
|
+
# file format
|
46
|
+
config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
|
47
|
+
config_param :delimiter, :string, :default => nil
|
48
|
+
# maintenance
|
49
|
+
config_param :maintenance_file_path, :string, :default => nil
|
50
|
+
# for debug
|
51
|
+
config_param :log_suffix, :string, :default => ''
|
52
|
+
|
53
|
+
def configure(conf)
|
54
|
+
super
|
55
|
+
@path = "#{@path}/" unless @path.end_with?('/') # append last slash
|
56
|
+
@path = @path[1..-1] if @path.start_with?('/') # remove head slash
|
57
|
+
@utc = true if conf['utc']
|
58
|
+
@db_conf = {
|
59
|
+
host:@redshift_host,
|
60
|
+
port:@redshift_port,
|
61
|
+
dbname:@redshift_dbname,
|
62
|
+
user:@redshift_user,
|
63
|
+
password:@redshift_password,
|
64
|
+
connect_timeout: @redshift_connect_timeout
|
65
|
+
}
|
66
|
+
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
67
|
+
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
68
|
+
@table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
|
69
|
+
@maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
|
70
|
+
end
|
71
|
+
|
72
|
+
def start
|
73
|
+
super
|
74
|
+
# init s3 conf
|
75
|
+
options = {}
|
76
|
+
if @aws_key_id && @aws_sec_key
|
77
|
+
options[:access_key_id] = @aws_key_id
|
78
|
+
options[:secret_access_key] = @aws_sec_key
|
79
|
+
end
|
80
|
+
options[:region] = @s3_region if @s3_region
|
81
|
+
@s3 = AWS::S3.new(options)
|
82
|
+
@bucket = @s3.buckets[@s3_bucket]
|
83
|
+
@redshift_connection = RedshiftConnection.new(@db_conf)
|
84
|
+
@redshift_connection.connect_start
|
85
|
+
end
|
86
|
+
|
87
|
+
def format(tag, time, record)
|
88
|
+
if json?
|
89
|
+
record.to_msgpack
|
90
|
+
elsif msgpack?
|
91
|
+
{ @record_log_tag => record }.to_msgpack
|
92
|
+
else
|
93
|
+
"#{record[@record_log_tag]}\n"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def write(chunk)
|
98
|
+
$log.debug format_log("start creating gz.")
|
99
|
+
@maintenance_monitor.check_maintenance!
|
100
|
+
|
101
|
+
# create a gz file
|
102
|
+
tmp = Tempfile.new("s3-")
|
103
|
+
tmp =
|
104
|
+
if json? || msgpack?
|
105
|
+
create_gz_file_from_structured_data(tmp, chunk, @delimiter)
|
106
|
+
else
|
107
|
+
create_gz_file_from_flat_data(tmp, chunk)
|
108
|
+
end
|
109
|
+
|
110
|
+
# no data -> skip
|
111
|
+
unless tmp
|
112
|
+
$log.debug format_log("received no valid data. ")
|
113
|
+
return false # for debug
|
114
|
+
end
|
115
|
+
|
116
|
+
# create a file path with time format
|
117
|
+
s3path = create_s3path(@bucket, @path)
|
118
|
+
|
119
|
+
# upload gz to s3
|
120
|
+
@bucket.objects[s3path].write(Pathname.new(tmp.path),
|
121
|
+
:acl => :bucket_owner_full_control)
|
122
|
+
|
123
|
+
# close temp file
|
124
|
+
tmp.close!
|
125
|
+
|
126
|
+
# copy gz on s3 to redshift
|
127
|
+
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
128
|
+
credentials = @s3.client.credential_provider.credentials
|
129
|
+
sql = "copy #{@table_name_with_schema} from '#{s3_uri}'"
|
130
|
+
sql += " CREDENTIALS 'aws_access_key_id=#{credentials[:access_key_id]};aws_secret_access_key=#{credentials[:secret_access_key]}"
|
131
|
+
sql += ";token=#{credentials[:session_token]}" if credentials[:session_token]
|
132
|
+
sql += "' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
|
133
|
+
|
134
|
+
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
135
|
+
|
136
|
+
begin
|
137
|
+
@redshift_connection.exec(sql)
|
138
|
+
$log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
|
139
|
+
rescue RedshiftError => e
|
140
|
+
if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
|
141
|
+
$log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
|
142
|
+
return false # for debug
|
143
|
+
end
|
144
|
+
raise e
|
145
|
+
end
|
146
|
+
true # for debug
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
def format_log(message)
|
152
|
+
(@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
def json?
|
158
|
+
@file_type == 'json'
|
159
|
+
end
|
160
|
+
|
161
|
+
def msgpack?
|
162
|
+
@file_type == 'msgpack'
|
163
|
+
end
|
164
|
+
|
165
|
+
def create_gz_file_from_flat_data(dst_file, chunk)
|
166
|
+
gzw = nil
|
167
|
+
begin
|
168
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
169
|
+
chunk.write_to(gzw)
|
170
|
+
ensure
|
171
|
+
gzw.close rescue nil if gzw
|
172
|
+
end
|
173
|
+
dst_file
|
174
|
+
end
|
175
|
+
|
176
|
+
def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
|
177
|
+
# fetch the table definition from redshift
|
178
|
+
redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
|
179
|
+
if redshift_table_columns == nil
|
180
|
+
raise "failed to fetch the redshift table definition."
|
181
|
+
elsif redshift_table_columns.empty?
|
182
|
+
$log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
|
183
|
+
return nil
|
184
|
+
end
|
185
|
+
|
186
|
+
# convert json to tsv format text
|
187
|
+
gzw = nil
|
188
|
+
begin
|
189
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
190
|
+
chunk.msgpack_each do |record|
|
191
|
+
next unless record
|
192
|
+
begin
|
193
|
+
hash = json? ? json_to_hash(record[@record_log_tag]) : record[@record_log_tag]
|
194
|
+
tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
|
195
|
+
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
196
|
+
rescue => e
|
197
|
+
$log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
|
198
|
+
$log.error_backtrace
|
199
|
+
end
|
200
|
+
end
|
201
|
+
return nil unless gzw.pos > 0
|
202
|
+
ensure
|
203
|
+
gzw.close rescue nil if gzw
|
204
|
+
end
|
205
|
+
dst_file
|
206
|
+
end
|
207
|
+
|
208
|
+
def determine_delimiter(file_type)
|
209
|
+
case file_type
|
210
|
+
when 'json', 'msgpack', 'tsv'
|
211
|
+
"\t"
|
212
|
+
when "csv"
|
213
|
+
','
|
214
|
+
else
|
215
|
+
raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def json_to_hash(json_text)
|
220
|
+
return nil if json_text.to_s.empty?
|
221
|
+
|
222
|
+
MultiJson.load(json_text)
|
223
|
+
rescue => e
|
224
|
+
$log.warn format_log("failed to parse json. "), :error => e.to_s
|
225
|
+
nil
|
226
|
+
end
|
227
|
+
|
228
|
+
def hash_to_table_text(redshift_table_columns, hash, delimiter)
|
229
|
+
return "" unless hash
|
230
|
+
|
231
|
+
# extract values from hash
|
232
|
+
val_list = redshift_table_columns.collect {|cn| hash[cn]}
|
233
|
+
|
234
|
+
if val_list.all?{|v| v.nil?}
|
235
|
+
$log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
|
236
|
+
return ""
|
237
|
+
end
|
238
|
+
|
239
|
+
generate_line_with_delimiter(val_list, delimiter)
|
240
|
+
end
|
241
|
+
|
242
|
+
def generate_line_with_delimiter(val_list, delimiter)
|
243
|
+
val_list.collect do |val|
|
244
|
+
case val
|
245
|
+
when nil
|
246
|
+
NULL_CHAR_FOR_COPY
|
247
|
+
when ''
|
248
|
+
''
|
249
|
+
when Hash, Array
|
250
|
+
escape_text_for_copy(MultiJson.dump(val))
|
251
|
+
else
|
252
|
+
escape_text_for_copy(val.to_s)
|
253
|
+
end
|
254
|
+
end.join(delimiter) + "\n"
|
255
|
+
end
|
256
|
+
|
257
|
+
def escape_text_for_copy(val)
|
258
|
+
val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
|
259
|
+
end
|
260
|
+
|
261
|
+
def create_s3path(bucket, path)
|
262
|
+
timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
|
263
|
+
i = 0
|
264
|
+
begin
|
265
|
+
suffix = "_#{'%02d' % i}"
|
266
|
+
s3path = "#{path}#{timestamp_key}#{suffix}.gz"
|
267
|
+
i += 1
|
268
|
+
end while bucket.objects[s3path].exists?
|
269
|
+
s3path
|
270
|
+
end
|
271
|
+
|
272
|
+
class RedshiftError < StandardError
|
273
|
+
def initialize(msg)
|
274
|
+
case msg
|
275
|
+
when PG::Error
|
276
|
+
@pg_error = msg
|
277
|
+
super(msg.to_s)
|
278
|
+
set_backtrace(msg.backtrace)
|
279
|
+
else
|
280
|
+
super
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
attr_accessor :pg_error
|
285
|
+
end
|
286
|
+
|
287
|
+
class RedshiftConnection
|
288
|
+
REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
|
289
|
+
|
290
|
+
def initialize(db_conf)
|
291
|
+
@db_conf = db_conf
|
292
|
+
@connection = nil
|
293
|
+
ObjectSpace.define_finalizer(self) {
|
294
|
+
close()
|
295
|
+
}
|
296
|
+
end
|
297
|
+
|
298
|
+
attr_reader :db_conf
|
299
|
+
|
300
|
+
def fetch_table_columns(table_name, schema_name)
|
301
|
+
columns = nil
|
302
|
+
exec(fetch_columns_sql(table_name, schema_name)) do |result|
|
303
|
+
columns = result.collect{|row| row['column_name']}
|
304
|
+
end
|
305
|
+
columns
|
306
|
+
end
|
307
|
+
|
308
|
+
def exec(sql, &block)
|
309
|
+
conn = @connection
|
310
|
+
conn = create_redshift_connection if conn.nil?
|
311
|
+
if block
|
312
|
+
conn.exec(sql) {|result| block.call(result)}
|
313
|
+
else
|
314
|
+
conn.exec(sql)
|
315
|
+
end
|
316
|
+
rescue PG::Error => e
|
317
|
+
raise RedshiftError.new(e)
|
318
|
+
ensure
|
319
|
+
conn.close if conn && @connection.nil?
|
320
|
+
end
|
321
|
+
|
322
|
+
def connect_start
|
323
|
+
@connection = create_redshift_connection
|
324
|
+
end
|
325
|
+
|
326
|
+
def close
|
327
|
+
@connection.close rescue nil if @connection
|
328
|
+
@connection = nil
|
329
|
+
end
|
330
|
+
|
331
|
+
private
|
332
|
+
|
333
|
+
def create_redshift_connection
|
334
|
+
hostaddr = IPSocket.getaddress(db_conf[:host])
|
335
|
+
db_conf[:hostaddr] = hostaddr
|
336
|
+
|
337
|
+
conn = PG::Connection.connect_start(db_conf)
|
338
|
+
raise RedshiftError.new("Unable to create a new connection.") unless conn
|
339
|
+
if conn.status == PG::CONNECTION_BAD
|
340
|
+
raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
|
341
|
+
end
|
342
|
+
|
343
|
+
socket = conn.socket_io
|
344
|
+
poll_status = PG::PGRES_POLLING_WRITING
|
345
|
+
until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
|
346
|
+
case poll_status
|
347
|
+
when PG::PGRES_POLLING_READING
|
348
|
+
IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
|
349
|
+
raise RedshiftError.new("Asynchronous connection timed out!(READING)")
|
350
|
+
when PG::PGRES_POLLING_WRITING
|
351
|
+
IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
|
352
|
+
raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
|
353
|
+
end
|
354
|
+
poll_status = conn.connect_poll
|
355
|
+
end
|
356
|
+
|
357
|
+
unless conn.status == PG::CONNECTION_OK
|
358
|
+
raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
|
359
|
+
end
|
360
|
+
|
361
|
+
conn
|
362
|
+
rescue => e
|
363
|
+
conn.close rescue nil if conn
|
364
|
+
raise RedshiftError.new(e) if e.kind_of?(PG::Error)
|
365
|
+
raise e
|
366
|
+
end
|
367
|
+
|
368
|
+
def fetch_columns_sql(table_name, schema_name = nil)
|
369
|
+
sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
|
370
|
+
sql << " and table_schema = '#{schema_name}'" if schema_name
|
371
|
+
sql << " order by ordinal_position;"
|
372
|
+
sql
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
class MaintenanceError < StandardError
|
377
|
+
end
|
378
|
+
|
379
|
+
class MaintenanceMonitor
|
380
|
+
def initialize(maintenance_file_path)
|
381
|
+
@file_path = maintenance_file_path
|
382
|
+
end
|
383
|
+
|
384
|
+
def in_maintenance?
|
385
|
+
!!(@file_path && File.exists?(@file_path))
|
386
|
+
end
|
387
|
+
|
388
|
+
def check_maintenance!
|
389
|
+
if in_maintenance?
|
390
|
+
raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
|
397
|
+
end
|
@@ -0,0 +1,503 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'fluent/test'
|
4
|
+
require 'fluent/plugin/out_redshift-out'
|
5
|
+
require 'flexmock/test_unit'
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
|
9
|
+
class RedshiftOutputTest < Test::Unit::TestCase
|
10
|
+
def setup
|
11
|
+
require 'aws-sdk-v1'
|
12
|
+
require 'pg'
|
13
|
+
require 'csv'
|
14
|
+
Fluent::Test.setup
|
15
|
+
PG::Error.module_eval { attr_accessor :result}
|
16
|
+
end
|
17
|
+
|
18
|
+
MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
|
19
|
+
|
20
|
+
CONFIG_BASE= %[
|
21
|
+
aws_key_id test_key_id
|
22
|
+
aws_sec_key test_sec_key
|
23
|
+
s3_bucket test_bucket
|
24
|
+
path log
|
25
|
+
redshift_host test_host
|
26
|
+
redshift_dbname test_db
|
27
|
+
redshift_user test_user
|
28
|
+
redshift_password test_password
|
29
|
+
redshift_tablename test_table
|
30
|
+
buffer_type memory
|
31
|
+
utc
|
32
|
+
log_suffix id:5 host:localhost
|
33
|
+
maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
|
34
|
+
]
|
35
|
+
CONFIG_CSV= %[
|
36
|
+
#{CONFIG_BASE}
|
37
|
+
file_type csv
|
38
|
+
]
|
39
|
+
CONFIG_TSV= %[
|
40
|
+
#{CONFIG_BASE}
|
41
|
+
file_type tsv
|
42
|
+
]
|
43
|
+
CONFIG_JSON = %[
|
44
|
+
#{CONFIG_BASE}
|
45
|
+
file_type json
|
46
|
+
]
|
47
|
+
CONFIG_JSON_WITH_SCHEMA = %[
|
48
|
+
#{CONFIG_BASE}
|
49
|
+
redshift_schemaname test_schema
|
50
|
+
file_type json
|
51
|
+
]
|
52
|
+
CONFIG_MSGPACK = %[
|
53
|
+
#{CONFIG_BASE}
|
54
|
+
file_type msgpack
|
55
|
+
]
|
56
|
+
CONFIG_PIPE_DELIMITER= %[
|
57
|
+
#{CONFIG_BASE}
|
58
|
+
delimiter |
|
59
|
+
]
|
60
|
+
CONFIG_PIPE_DELIMITER_WITH_NAME= %[
|
61
|
+
#{CONFIG_BASE}
|
62
|
+
file_type pipe
|
63
|
+
delimiter |
|
64
|
+
]
|
65
|
+
CONFIG=CONFIG_CSV
|
66
|
+
|
67
|
+
RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
|
68
|
+
RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
|
69
|
+
RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
|
70
|
+
RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
|
71
|
+
RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
|
72
|
+
RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
|
73
|
+
RECORD_MSGPACK_A = {"key_a" => "val_a", "key_b" => "val_b"}
|
74
|
+
RECORD_MSGPACK_B = {"key_c" => "val_c", "key_d" => "val_d"}
|
75
|
+
DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
|
76
|
+
|
77
|
+
def create_driver(conf = CONFIG, tag='test.input')
|
78
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_driver_no_write(conf = CONFIG, tag='test.input')
|
82
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
|
83
|
+
def write(chunk)
|
84
|
+
chunk.read
|
85
|
+
end
|
86
|
+
end.configure(conf)
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_configure
|
90
|
+
assert_raise(Fluent::ConfigError) {
|
91
|
+
d = create_driver('')
|
92
|
+
}
|
93
|
+
assert_raise(Fluent::ConfigError) {
|
94
|
+
d = create_driver(CONFIG_BASE)
|
95
|
+
}
|
96
|
+
d = create_driver(CONFIG_CSV)
|
97
|
+
assert_equal "test_key_id", d.instance.aws_key_id
|
98
|
+
assert_equal "test_sec_key", d.instance.aws_sec_key
|
99
|
+
assert_equal "test_bucket", d.instance.s3_bucket
|
100
|
+
assert_equal "log/", d.instance.path
|
101
|
+
assert_equal "test_host", d.instance.redshift_host
|
102
|
+
assert_equal 5439, d.instance.redshift_port
|
103
|
+
assert_equal "test_db", d.instance.redshift_dbname
|
104
|
+
assert_equal "test_user", d.instance.redshift_user
|
105
|
+
assert_equal "test_password", d.instance.redshift_password
|
106
|
+
assert_equal "test_table", d.instance.redshift_tablename
|
107
|
+
assert_equal nil, d.instance.redshift_schemaname
|
108
|
+
assert_equal "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS", d.instance.redshift_copy_base_options
|
109
|
+
assert_equal nil, d.instance.redshift_copy_options
|
110
|
+
assert_equal "csv", d.instance.file_type
|
111
|
+
assert_equal ",", d.instance.delimiter
|
112
|
+
assert_equal true, d.instance.utc
|
113
|
+
assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
|
114
|
+
end
|
115
|
+
def test_configure_with_schemaname
|
116
|
+
d = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
117
|
+
assert_equal "test_schema", d.instance.redshift_schemaname
|
118
|
+
end
|
119
|
+
def test_configure_localtime
|
120
|
+
d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
|
121
|
+
assert_equal false, d.instance.utc
|
122
|
+
end
|
123
|
+
def test_configure_no_path
|
124
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
|
125
|
+
assert_equal "", d.instance.path
|
126
|
+
end
|
127
|
+
def test_configure_root_path
|
128
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
|
129
|
+
assert_equal "", d.instance.path
|
130
|
+
end
|
131
|
+
def test_configure_path_with_slash
|
132
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
|
133
|
+
assert_equal "log/", d.instance.path
|
134
|
+
end
|
135
|
+
def test_configure_path_starts_with_slash
|
136
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log/'))
|
137
|
+
assert_equal "log/", d.instance.path
|
138
|
+
end
|
139
|
+
def test_configure_path_starts_with_slash_without_last_slash
|
140
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /log'))
|
141
|
+
assert_equal "log/", d.instance.path
|
142
|
+
end
|
143
|
+
def test_configure_tsv
|
144
|
+
d1 = create_driver(CONFIG_TSV)
|
145
|
+
assert_equal "tsv", d1.instance.file_type
|
146
|
+
assert_equal "\t", d1.instance.delimiter
|
147
|
+
end
|
148
|
+
def test_configure_json
|
149
|
+
d2 = create_driver(CONFIG_JSON)
|
150
|
+
assert_equal "json", d2.instance.file_type
|
151
|
+
assert_equal "\t", d2.instance.delimiter
|
152
|
+
end
|
153
|
+
def test_configure_msgpack
|
154
|
+
d2 = create_driver(CONFIG_MSGPACK)
|
155
|
+
assert_equal "msgpack", d2.instance.file_type
|
156
|
+
assert_equal "\t", d2.instance.delimiter
|
157
|
+
end
|
158
|
+
def test_configure_original_file_type
|
159
|
+
d3 = create_driver(CONFIG_PIPE_DELIMITER)
|
160
|
+
assert_equal nil, d3.instance.file_type
|
161
|
+
assert_equal "|", d3.instance.delimiter
|
162
|
+
|
163
|
+
d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
|
164
|
+
assert_equal "pipe", d4.instance.file_type
|
165
|
+
assert_equal "|", d4.instance.delimiter
|
166
|
+
end
|
167
|
+
def test_configure_no_log_suffix
|
168
|
+
d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
|
169
|
+
assert_equal "", d.instance.log_suffix
|
170
|
+
end
|
171
|
+
|
172
|
+
def emit_csv(d)
|
173
|
+
d.emit(RECORD_CSV_A, DEFAULT_TIME)
|
174
|
+
d.emit(RECORD_CSV_B, DEFAULT_TIME)
|
175
|
+
end
|
176
|
+
def emit_tsv(d)
|
177
|
+
d.emit(RECORD_TSV_A, DEFAULT_TIME)
|
178
|
+
d.emit(RECORD_TSV_B, DEFAULT_TIME)
|
179
|
+
end
|
180
|
+
def emit_json(d)
|
181
|
+
d.emit(RECORD_JSON_A, DEFAULT_TIME)
|
182
|
+
d.emit(RECORD_JSON_B, DEFAULT_TIME)
|
183
|
+
end
|
184
|
+
def emit_msgpack(d)
|
185
|
+
d.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
|
186
|
+
d.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_format_csv
|
190
|
+
setup_mocks("")
|
191
|
+
d_csv = create_driver_no_write(CONFIG_CSV)
|
192
|
+
emit_csv(d_csv)
|
193
|
+
d_csv.expect_format RECORD_CSV_A['log'] + "\n"
|
194
|
+
d_csv.expect_format RECORD_CSV_B['log'] + "\n"
|
195
|
+
d_csv.run
|
196
|
+
end
|
197
|
+
def test_format_tsv
|
198
|
+
setup_mocks("")
|
199
|
+
d_tsv = create_driver_no_write(CONFIG_TSV)
|
200
|
+
emit_tsv(d_tsv)
|
201
|
+
d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
|
202
|
+
d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
|
203
|
+
d_tsv.run
|
204
|
+
end
|
205
|
+
def test_format_json
|
206
|
+
setup_mocks("")
|
207
|
+
d_json = create_driver_no_write(CONFIG_JSON)
|
208
|
+
emit_json(d_json)
|
209
|
+
d_json.expect_format RECORD_JSON_A.to_msgpack
|
210
|
+
d_json.expect_format RECORD_JSON_B.to_msgpack
|
211
|
+
d_json.run
|
212
|
+
end
|
213
|
+
|
214
|
+
def test_format_msgpack
|
215
|
+
setup_mocks("")
|
216
|
+
d_msgpack = create_driver_no_write(CONFIG_MSGPACK)
|
217
|
+
emit_msgpack(d_msgpack)
|
218
|
+
d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_A }.to_msgpack)
|
219
|
+
d_msgpack.expect_format({ 'log' => RECORD_MSGPACK_B }.to_msgpack)
|
220
|
+
d_msgpack.run
|
221
|
+
end
|
222
|
+
|
223
|
+
def setup_redshift_connection_mock(options = {})
|
224
|
+
options ||= {}
|
225
|
+
column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
|
226
|
+
schema_name = options[:schema_name]
|
227
|
+
table_name = options[:table_name] || 'test_table'
|
228
|
+
exec_sql_proc = options[:exec_sql_proc]
|
229
|
+
|
230
|
+
column_list_query_regex =
|
231
|
+
if schema_name
|
232
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
|
233
|
+
else
|
234
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
|
235
|
+
end
|
236
|
+
copy_query_regex =
|
237
|
+
if schema_name
|
238
|
+
/\Acopy #{schema_name}.#{table_name} from/
|
239
|
+
else
|
240
|
+
/\Acopy #{table_name} from/
|
241
|
+
end
|
242
|
+
|
243
|
+
flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
|
244
|
+
conn.should_receive(:exec).and_return do |sql, block|
|
245
|
+
if exec_sql_proc
|
246
|
+
exec_sql_proc.call(sql, block)
|
247
|
+
elsif block
|
248
|
+
if sql =~ column_list_query_regex
|
249
|
+
block.call column_names.collect{|key| {'column_name' => key}}
|
250
|
+
else
|
251
|
+
block.call []
|
252
|
+
end
|
253
|
+
else
|
254
|
+
unless sql =~ copy_query_regex
|
255
|
+
error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
|
256
|
+
error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
|
257
|
+
raise Fluent::RedshiftOutput::RedshiftError.new(error)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
conn.should_receive(:connect_start)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
def setup_s3_mock(expected_data)
|
266
|
+
current_time = Time.now
|
267
|
+
|
268
|
+
# create mock of s3 object
|
269
|
+
s3obj = flexmock(AWS::S3::S3Object)
|
270
|
+
s3obj.should_receive(:exists?).with_any_args.and_return { false }
|
271
|
+
s3obj.should_receive(:write).with(
|
272
|
+
# pathname
|
273
|
+
on { |pathname|
|
274
|
+
data = nil
|
275
|
+
pathname.open { |f|
|
276
|
+
gz = Zlib::GzipReader.new(f)
|
277
|
+
data = gz.read
|
278
|
+
gz.close
|
279
|
+
}
|
280
|
+
assert_equal expected_data, data
|
281
|
+
},
|
282
|
+
:acl => :bucket_owner_full_control
|
283
|
+
).and_return { true }
|
284
|
+
|
285
|
+
# create mock of s3 object collection
|
286
|
+
s3obj_col = flexmock(AWS::S3::ObjectCollection)
|
287
|
+
s3obj_col.should_receive(:[]).with(
|
288
|
+
on { |key|
|
289
|
+
expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
|
290
|
+
key == expected_key
|
291
|
+
}).
|
292
|
+
and_return {
|
293
|
+
s3obj
|
294
|
+
}
|
295
|
+
|
296
|
+
# create mock of s3 bucket
|
297
|
+
flexmock(AWS::S3::Bucket).new_instances do |bucket|
|
298
|
+
bucket.should_receive(:objects).with_any_args.
|
299
|
+
and_return {
|
300
|
+
s3obj_col
|
301
|
+
}
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
def setup_tempfile_mock_to_be_closed
|
306
|
+
flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
|
307
|
+
end
|
308
|
+
|
309
|
+
def setup_mocks(expected_data, options = {})
|
310
|
+
setup_redshift_connection_mock(options)
|
311
|
+
setup_s3_mock(expected_data)
|
312
|
+
end
|
313
|
+
|
314
|
+
def test_write_with_csv
|
315
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
316
|
+
setup_tempfile_mock_to_be_closed
|
317
|
+
d_csv = create_driver
|
318
|
+
emit_csv(d_csv)
|
319
|
+
assert_equal true, d_csv.run
|
320
|
+
end
|
321
|
+
|
322
|
+
def test_write_with_json
|
323
|
+
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
|
324
|
+
setup_tempfile_mock_to_be_closed
|
325
|
+
d_json = create_driver(CONFIG_JSON)
|
326
|
+
emit_json(d_json)
|
327
|
+
assert_equal true, d_json.run
|
328
|
+
end
|
329
|
+
|
330
|
+
def test_write_with_json_hash_value
|
331
|
+
setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
332
|
+
d_json = create_driver(CONFIG_JSON)
|
333
|
+
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
|
334
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
335
|
+
assert_equal true, d_json.run
|
336
|
+
end
|
337
|
+
|
338
|
+
def test_write_with_json_array_value
|
339
|
+
setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
340
|
+
d_json = create_driver(CONFIG_JSON)
|
341
|
+
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
|
342
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
343
|
+
assert_equal true, d_json.run
|
344
|
+
end
|
345
|
+
|
346
|
+
def test_write_with_json_including_tab_newline_quote
|
347
|
+
setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
348
|
+
d_json = create_driver(CONFIG_JSON)
|
349
|
+
d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
|
350
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
351
|
+
assert_equal true, d_json.run
|
352
|
+
end
|
353
|
+
|
354
|
+
def test_write_with_json_empty_text_value
|
355
|
+
setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
|
356
|
+
d_json = create_driver(CONFIG_JSON)
|
357
|
+
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
|
358
|
+
assert_equal true, d_json.run
|
359
|
+
end
|
360
|
+
|
361
|
+
def test_write_with_json_no_data
|
362
|
+
setup_mocks("")
|
363
|
+
d_json = create_driver(CONFIG_JSON)
|
364
|
+
d_json.emit("", DEFAULT_TIME)
|
365
|
+
d_json.emit("", DEFAULT_TIME)
|
366
|
+
assert_equal false, d_json.run
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_write_with_json_invalid_one_line
|
370
|
+
setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
|
371
|
+
d_json = create_driver(CONFIG_JSON)
|
372
|
+
d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
|
373
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
374
|
+
assert_equal true, d_json.run
|
375
|
+
end
|
376
|
+
|
377
|
+
def test_write_with_json_no_available_data
|
378
|
+
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
|
379
|
+
d_json = create_driver(CONFIG_JSON)
|
380
|
+
d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
|
381
|
+
d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
|
382
|
+
assert_equal true, d_json.run
|
383
|
+
end
|
384
|
+
|
385
|
+
def test_write_with_msgpack
|
386
|
+
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
|
387
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
388
|
+
emit_msgpack(d_msgpack)
|
389
|
+
assert_equal true, d_msgpack.run
|
390
|
+
end
|
391
|
+
|
392
|
+
def test_write_with_msgpack_hash_value
|
393
|
+
setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
394
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
395
|
+
d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
|
396
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
397
|
+
assert_equal true, d_msgpack.run
|
398
|
+
end
|
399
|
+
|
400
|
+
def test_write_with_msgpack_array_value
|
401
|
+
setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
402
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
403
|
+
d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
|
404
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
405
|
+
assert_equal true, d_msgpack.run
|
406
|
+
end
|
407
|
+
|
408
|
+
def test_write_with_msgpack_including_tab_newline_quote
|
409
|
+
setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
|
410
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
411
|
+
d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
|
412
|
+
d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
|
413
|
+
assert_equal true, d_msgpack.run
|
414
|
+
end
|
415
|
+
|
416
|
+
def test_write_with_msgpack_no_data
|
417
|
+
setup_mocks("")
|
418
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
419
|
+
d_msgpack.emit({}, DEFAULT_TIME)
|
420
|
+
d_msgpack.emit({}, DEFAULT_TIME)
|
421
|
+
assert_equal false, d_msgpack.run
|
422
|
+
end
|
423
|
+
|
424
|
+
def test_write_with_msgpack_no_available_data
|
425
|
+
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
|
426
|
+
d_msgpack = create_driver(CONFIG_MSGPACK)
|
427
|
+
d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
|
428
|
+
d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
|
429
|
+
assert_equal true, d_msgpack.run
|
430
|
+
end
|
431
|
+
|
432
|
+
def test_write_redshift_connection_error
|
433
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
434
|
+
exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
|
435
|
+
d_csv = create_driver
|
436
|
+
emit_csv(d_csv)
|
437
|
+
assert_raise(Fluent::RedshiftOutput::RedshiftError) {
|
438
|
+
d_csv.run
|
439
|
+
}
|
440
|
+
end
|
441
|
+
|
442
|
+
def test_write_redshift_load_error
|
443
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
444
|
+
exec_sql_proc: Proc.new {|sql, block|
|
445
|
+
msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
|
446
|
+
raise Fluent::RedshiftOutput::RedshiftError.new(msg)
|
447
|
+
})
|
448
|
+
|
449
|
+
d_csv = create_driver
|
450
|
+
emit_csv(d_csv)
|
451
|
+
assert_equal false, d_csv.run
|
452
|
+
end
|
453
|
+
|
454
|
+
def test_write_with_json_redshift_connection_error
|
455
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
456
|
+
exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
|
457
|
+
|
458
|
+
d_json = create_driver(CONFIG_JSON)
|
459
|
+
emit_json(d_json)
|
460
|
+
assert_raise(Fluent::RedshiftOutput::RedshiftError) {
|
461
|
+
d_json.run
|
462
|
+
}
|
463
|
+
end
|
464
|
+
|
465
|
+
def test_write_with_json_no_table_on_redshift
|
466
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
467
|
+
exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
|
468
|
+
|
469
|
+
d_json = create_driver(CONFIG_JSON)
|
470
|
+
emit_json(d_json)
|
471
|
+
assert_equal false, d_json.run
|
472
|
+
end
|
473
|
+
|
474
|
+
def test_write_with_json_failed_to_get_columns
|
475
|
+
setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
|
476
|
+
|
477
|
+
d_json = create_driver(CONFIG_JSON)
|
478
|
+
emit_json(d_json)
|
479
|
+
assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
|
480
|
+
d_json.run
|
481
|
+
}
|
482
|
+
end
|
483
|
+
|
484
|
+
def test_write_with_json_fetch_column_with_schema
|
485
|
+
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
|
486
|
+
schema_name: 'test_schema')
|
487
|
+
d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
488
|
+
emit_json(d_json)
|
489
|
+
assert_equal true, d_json.run
|
490
|
+
end
|
491
|
+
|
492
|
+
def test_maintenance_mode
|
493
|
+
setup_mocks("")
|
494
|
+
flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
|
495
|
+
|
496
|
+
d_json = create_driver(CONFIG_JSON)
|
497
|
+
emit_json(d_json)
|
498
|
+
assert_raise(Fluent::RedshiftOutput::MaintenanceError,
|
499
|
+
"Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
|
500
|
+
d_json.run
|
501
|
+
}
|
502
|
+
end
|
503
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-redshift-out2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ertugrul Yilmaz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-07-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fluentd
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.10.0
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.10.0
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: aws-sdk-v1
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.6.3
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.6.3
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: multi_json
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '1.10'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - "~>"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '1.10'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: yajl-ruby
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '1.2'
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '1.2'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: pg
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: 0.17.0
|
82
|
+
type: :runtime
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.17.0
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: rake
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
type: :development
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: simplecov
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 0.5.4
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: 0.5.4
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: flexmock
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: 1.3.1
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: 1.3.1
|
131
|
+
description: Amazon Redshift output plugin for Fluentd
|
132
|
+
email:
|
133
|
+
- "***.***.***@gmail.com"
|
134
|
+
executables: []
|
135
|
+
extensions: []
|
136
|
+
extra_rdoc_files: []
|
137
|
+
files:
|
138
|
+
- ".gitignore"
|
139
|
+
- Gemfile
|
140
|
+
- README.md
|
141
|
+
- Rakefile
|
142
|
+
- VERSION
|
143
|
+
- fluent-plugin-redshift-out.gemspec
|
144
|
+
- lib/fluent/plugin/out_redshift-out.rb
|
145
|
+
- test/plugin/test_out_redshift.rb
|
146
|
+
- test/test_helper.rb
|
147
|
+
homepage: https://github.com/ertugrulyilmaz/fluent-plugin-redshift-out
|
148
|
+
licenses: []
|
149
|
+
metadata: {}
|
150
|
+
post_install_message:
|
151
|
+
rdoc_options: []
|
152
|
+
require_paths:
|
153
|
+
- lib
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
161
|
+
- - ">="
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
164
|
+
requirements: []
|
165
|
+
rubygems_version: 3.0.3
|
166
|
+
signing_key:
|
167
|
+
specification_version: 4
|
168
|
+
summary: Amazon Redshift output plugin for Fluentd
|
169
|
+
test_files:
|
170
|
+
- test/plugin/test_out_redshift.rb
|
171
|
+
- test/test_helper.rb
|