fluent-plugin-redshift 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/Gemfile +3 -0
- data/README.md +131 -0
- data/Rakefile +16 -0
- data/VERSION +1 -0
- data/fluent-plugin-redshift.gemspec +25 -0
- data/lib/fluent/plugin/out_redshift.rb +230 -0
- data/test/plugin/test_out_redshift.rb +395 -0
- data/test/test_helper.rb +8 -0
- metadata +152 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
Amazon Redshift output plugin for Fluentd
|
2
|
+
========
|
3
|
+
|
4
|
+
## Overview
|
5
|
+
|
6
|
+
Amazon Redshift output plugin uploads event logs to an Amazon Redshift Cluster. Supportted data formats are csv, tsv and json. An S3 bucket and a Redshift Cluster are required to use this plugin.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
gem install fluent-plugin-redshift
|
11
|
+
|
12
|
+
## Configuration
|
13
|
+
|
14
|
+
Format:
|
15
|
+
|
16
|
+
<match my.tag>
|
17
|
+
type redshift
|
18
|
+
|
19
|
+
# s3 (for copying data to redshift)
|
20
|
+
aws_key_id YOUR_AWS_KEY_ID
|
21
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
22
|
+
s3_bucket YOUR_S3_BUCKET
|
23
|
+
s3_endpoint YOUR_S3_BUCKET_END_POINT
|
24
|
+
path YOUR_S3_PATH
|
25
|
+
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
26
|
+
|
27
|
+
# redshift
|
28
|
+
redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
|
29
|
+
redshift_port YOUR_AMAZON_REDSHIFT_CLUSTER_PORT
|
30
|
+
redshift_dbname YOUR_AMAZON_REDSHIFT_CLUSTER_DATABASE_NAME
|
31
|
+
redshift_user YOUR_AMAZON_REDSHIFT_CLUSTER_USER_NAME
|
32
|
+
redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
|
33
|
+
redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
|
34
|
+
file_type [tsv|csv|json]
|
35
|
+
|
36
|
+
# buffer
|
37
|
+
buffer_type file
|
38
|
+
buffer_path /var/log/fluent/redshift
|
39
|
+
flush_interval 15m
|
40
|
+
buffer_chunk_limit 1g
|
41
|
+
</match>
|
42
|
+
|
43
|
+
Example (watch and upload json formatted apache log):
|
44
|
+
|
45
|
+
<source>
|
46
|
+
type tail
|
47
|
+
path redshift_test.json
|
48
|
+
pos_file redshift_test_json.pos
|
49
|
+
tag redshift.json
|
50
|
+
format /^(?<log>.*)$/
|
51
|
+
</source>
|
52
|
+
|
53
|
+
<match redshift.json>
|
54
|
+
type redshift
|
55
|
+
|
56
|
+
# s3 (for copying data to redshift)
|
57
|
+
aws_key_id YOUR_AWS_KEY_ID
|
58
|
+
aws_sec_key YOUR_AWS_SECRET_KEY
|
59
|
+
s3_bucket hapyrus-example
|
60
|
+
s3_endpoint s3.amazonaws.com
|
61
|
+
path apache_json_log
|
62
|
+
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
63
|
+
|
64
|
+
# redshift
|
65
|
+
redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
|
66
|
+
redshift_port 5439
|
67
|
+
redshift_dbname fluent-redshift-test
|
68
|
+
redshift_user fluent
|
69
|
+
redshift_password fluent-password
|
70
|
+
redshift_tablename apache_log
|
71
|
+
file_type json
|
72
|
+
|
73
|
+
# buffer
|
74
|
+
buffer_type file
|
75
|
+
buffer_path /var/log/fluent/redshift
|
76
|
+
flush_interval 15m
|
77
|
+
buffer_chunk_limit 1g
|
78
|
+
<match>
|
79
|
+
|
80
|
+
+ `type` (required) : The value must be `redshift`.
|
81
|
+
|
82
|
+
+ `aws_key_id` (required) : AWS access key id to access s3 bucket.
|
83
|
+
|
84
|
+
+ `aws_sec_key` (required) : AWS securet key id to access s3 bucket.
|
85
|
+
|
86
|
+
+ `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
|
87
|
+
|
88
|
+
+ `s3_endpoint` : s3 endpoint.
|
89
|
+
|
90
|
+
+ `path` (required) : s3 path to input.
|
91
|
+
|
92
|
+
+ `timestamp_key_format` : The format of the object keys. It can include date-format directives.
|
93
|
+
|
94
|
+
- Default parameter is "year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M"
|
95
|
+
- For example, the s3 path is as following with the above example configration.
|
96
|
+
<pre>
|
97
|
+
hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1215_00.gz
|
98
|
+
hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
|
99
|
+
</pre>
|
100
|
+
|
101
|
+
+ `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
|
102
|
+
|
103
|
+
+ `redshift_port` (required) : port number.
|
104
|
+
|
105
|
+
+ `redshift_dbname` (required) : database name.
|
106
|
+
|
107
|
+
+ `redshift_user` (required) : user name.
|
108
|
+
|
109
|
+
+ `redshift_password` (required) : password for the user name.
|
110
|
+
|
111
|
+
+ `redshift_tablename` (required) : table name to store data.
|
112
|
+
|
113
|
+
+ `file_type` : file format of the source data. `csv`, `tsv` or `json` are available.
|
114
|
+
|
115
|
+
+ `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
|
116
|
+
|
117
|
+
+ `buffer_type` : buffer type.
|
118
|
+
|
119
|
+
+ `buffer_path` : path prefix of the files to buffer logs.
|
120
|
+
|
121
|
+
+ `flush_interval` : flush interval.
|
122
|
+
|
123
|
+
+ `buffer_chunk_limit` : limit buffer size to chunk.
|
124
|
+
|
125
|
+
+ `utc` : utc time zone. This parameter affects `timestamp_key_format`.
|
126
|
+
|
127
|
+
## License
|
128
|
+
|
129
|
+
Copyright (c) 2013 [Hapyrus Inc](http://hapyrus.com)
|
130
|
+
|
131
|
+
[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler"
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.test_files = FileList['test/plugin/*.rb']
|
8
|
+
test.verbose = true
|
9
|
+
end
|
10
|
+
|
11
|
+
task :coverage do |t|
|
12
|
+
ENV['COVERAGE'] = '1'
|
13
|
+
Rake::Task["test"].invoke
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => [:build]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "fluent-plugin-redshift"
|
6
|
+
gem.version = File.read("VERSION").strip
|
7
|
+
gem.authors = ["Masashi Miyazaki"]
|
8
|
+
gem.email = ["mmasashi@gmail.com"]
|
9
|
+
gem.description = %q{Amazon Redshift output plugin for Fluentd}
|
10
|
+
gem.summary = gem.description
|
11
|
+
gem.homepage = "https://github.com/hapyrus/fluent-plugin-redshift"
|
12
|
+
gem.has_rdoc = false
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_dependency "fluentd", "~> 0.10.0"
|
20
|
+
gem.add_dependency "aws-sdk", ">= 1.6.3"
|
21
|
+
gem.add_dependency "pg", "~> 0.14.0"
|
22
|
+
gem.add_development_dependency "rake"
|
23
|
+
gem.add_development_dependency "simplecov", ">= 0.5.4"
|
24
|
+
gem.add_development_dependency "flexmock", ">= 1.3.1"
|
25
|
+
end
|
@@ -0,0 +1,230 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
|
4
|
+
class RedshiftOutput < BufferedOutput
|
5
|
+
Fluent::Plugin.register_output('redshift', self)
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
require 'aws-sdk'
|
10
|
+
require 'zlib'
|
11
|
+
require 'time'
|
12
|
+
require 'tempfile'
|
13
|
+
require 'pg'
|
14
|
+
require 'json'
|
15
|
+
require 'csv'
|
16
|
+
end
|
17
|
+
|
18
|
+
config_param :record_log_tag, :string, :default => 'log'
|
19
|
+
# s3
|
20
|
+
config_param :aws_key_id, :string
|
21
|
+
config_param :aws_sec_key, :string
|
22
|
+
config_param :s3_bucket, :string
|
23
|
+
config_param :s3_endpoint, :string, :default => nil
|
24
|
+
config_param :path, :string, :default => ""
|
25
|
+
config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
|
26
|
+
config_param :utc, :bool, :default => false
|
27
|
+
# redshift
|
28
|
+
config_param :redshift_host, :string
|
29
|
+
config_param :redshift_port, :integer, :default => 5439
|
30
|
+
config_param :redshift_dbname, :string
|
31
|
+
config_param :redshift_user, :string
|
32
|
+
config_param :redshift_password, :string
|
33
|
+
config_param :redshift_tablename, :string
|
34
|
+
# file format
|
35
|
+
config_param :file_type, :string, :default => nil # json, tsv, csv
|
36
|
+
config_param :delimiter, :string, :default => nil
|
37
|
+
|
38
|
+
def configure(conf)
|
39
|
+
super
|
40
|
+
@path = "#{@path}/" if /.+[^\/]$/ =~ @path
|
41
|
+
@path = "" if @path == "/"
|
42
|
+
@utc = true if conf['utc']
|
43
|
+
@db_conf = {
|
44
|
+
host:@redshift_host,
|
45
|
+
port:@redshift_port,
|
46
|
+
dbname:@redshift_dbname,
|
47
|
+
user:@redshift_user,
|
48
|
+
password:@redshift_password
|
49
|
+
}
|
50
|
+
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
51
|
+
$log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
|
52
|
+
@copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' REMOVEQUOTES GZIP;"
|
53
|
+
end
|
54
|
+
|
55
|
+
def start
|
56
|
+
super
|
57
|
+
# init s3 conf
|
58
|
+
options = {
|
59
|
+
:access_key_id => @aws_key_id,
|
60
|
+
:secret_access_key => @aws_sec_key
|
61
|
+
}
|
62
|
+
options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
|
63
|
+
@s3 = AWS::S3.new(options)
|
64
|
+
@bucket = @s3.buckets[@s3_bucket]
|
65
|
+
end
|
66
|
+
|
67
|
+
def format(tag, time, record)
|
68
|
+
(json?) ? record.to_msgpack : "#{record[@record_log_tag]}\n"
|
69
|
+
end
|
70
|
+
|
71
|
+
def write(chunk)
|
72
|
+
# create a gz file
|
73
|
+
tmp = Tempfile.new("s3-")
|
74
|
+
tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
|
75
|
+
: create_gz_file_from_msgpack(tmp, chunk)
|
76
|
+
|
77
|
+
# no data -> skip
|
78
|
+
unless tmp
|
79
|
+
$log.debug "received no valid data. "
|
80
|
+
return
|
81
|
+
end
|
82
|
+
|
83
|
+
# create a file path with time format
|
84
|
+
s3path = create_s3path(@bucket, @path)
|
85
|
+
|
86
|
+
# upload gz to s3
|
87
|
+
@bucket.objects[s3path].write(Pathname.new(tmp.path),
|
88
|
+
:acl => :bucket_owner_full_control)
|
89
|
+
# copy gz on s3 to redshift
|
90
|
+
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
91
|
+
sql = @copy_sql_template % [s3_uri, @aws_sec_key]
|
92
|
+
$log.debug "start copying. s3_uri=#{s3_uri}"
|
93
|
+
conn = nil
|
94
|
+
begin
|
95
|
+
conn = PG.connect(@db_conf)
|
96
|
+
conn.exec(sql)
|
97
|
+
$log.info "completed copying to redshift. s3_uri=#{s3_uri}"
|
98
|
+
rescue PG::Error => e
|
99
|
+
$log.error "failed to copy data into redshift. sql=#{s3_uri}", :error=>e.to_s
|
100
|
+
raise e if e.result.nil? # retry if connection errors
|
101
|
+
ensure
|
102
|
+
conn.close rescue nil if conn
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def json?
|
108
|
+
@file_type == 'json'
|
109
|
+
end
|
110
|
+
|
111
|
+
def create_gz_file_from_msgpack(dst_file, chunk)
|
112
|
+
gzw = nil
|
113
|
+
begin
|
114
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
115
|
+
chunk.write_to(gzw)
|
116
|
+
ensure
|
117
|
+
gzw.close rescue nil if gzw
|
118
|
+
end
|
119
|
+
dst_file
|
120
|
+
end
|
121
|
+
|
122
|
+
def create_gz_file_from_json(dst_file, chunk, delimiter)
|
123
|
+
# fetch the table definition from redshift
|
124
|
+
redshift_table_columns = fetch_table_columns
|
125
|
+
if redshift_table_columns == nil
|
126
|
+
raise "failed to fetch the redshift table definition."
|
127
|
+
elsif redshift_table_columns.empty?
|
128
|
+
$log.warn "no table on redshift. table_name=#{@redshift_tablename}"
|
129
|
+
return nil
|
130
|
+
end
|
131
|
+
|
132
|
+
# convert json to tsv format text
|
133
|
+
table_texts = ""
|
134
|
+
chunk.msgpack_each do |record|
|
135
|
+
begin
|
136
|
+
table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
|
137
|
+
rescue => e
|
138
|
+
$log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
|
139
|
+
$log.error_backtrace
|
140
|
+
end
|
141
|
+
end
|
142
|
+
return nil if table_texts.empty?
|
143
|
+
|
144
|
+
# create gz
|
145
|
+
gzw = nil
|
146
|
+
begin
|
147
|
+
gzw = Zlib::GzipWriter.new(dst_file)
|
148
|
+
gzw.write(table_texts)
|
149
|
+
ensure
|
150
|
+
gzw.close rescue nil if gzw
|
151
|
+
end
|
152
|
+
dst_file
|
153
|
+
end
|
154
|
+
|
155
|
+
def determine_delimiter(file_type)
|
156
|
+
case file_type
|
157
|
+
when 'json', 'tsv'
|
158
|
+
"\t"
|
159
|
+
when "csv"
|
160
|
+
','
|
161
|
+
else
|
162
|
+
raise Fluent::ConfigError, "Invalid file_type:#{file_type}."
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def fetch_table_columns
|
167
|
+
fetch_columns_sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
168
|
+
conn = PG.connect(@db_conf)
|
169
|
+
begin
|
170
|
+
columns = nil
|
171
|
+
conn.exec(fetch_columns_sql) do |result|
|
172
|
+
columns = result.collect{|row| row['column_name']}
|
173
|
+
end
|
174
|
+
columns
|
175
|
+
ensure
|
176
|
+
conn.close rescue nil
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def json_to_table_text(redshift_table_columns, json_text, delimiter)
|
181
|
+
return "" if json_text.nil? or json_text.empty?
|
182
|
+
|
183
|
+
# parse json text
|
184
|
+
json_obj = nil
|
185
|
+
begin
|
186
|
+
json_obj = JSON.parse(json_text)
|
187
|
+
rescue => e
|
188
|
+
$log.warn "failed to parse json. ", :error=>e.to_s
|
189
|
+
return ""
|
190
|
+
end
|
191
|
+
return "" unless json_obj
|
192
|
+
|
193
|
+
# extract values from json
|
194
|
+
val_list = redshift_table_columns.collect do |cn|
|
195
|
+
val = json_obj[cn]
|
196
|
+
val = nil unless val and not val.to_s.empty?
|
197
|
+
val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
|
198
|
+
val.to_s unless val.nil?
|
199
|
+
end
|
200
|
+
if val_list.all?{|v| v.nil? or v.empty?}
|
201
|
+
$log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
|
202
|
+
return ""
|
203
|
+
end
|
204
|
+
|
205
|
+
# generate tsv text
|
206
|
+
begin
|
207
|
+
CSV.generate(:col_sep=>delimiter, :quote_char => '"') do |row|
|
208
|
+
row << val_list # inlude new line
|
209
|
+
end
|
210
|
+
rescue => e
|
211
|
+
$log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
|
212
|
+
raise e
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def create_s3path(bucket, path)
|
217
|
+
timestamp_key = (@utc) ? Time.now.utc.strftime(@timestamp_key_format) : Time.now.strftime(@timestamp_key_format)
|
218
|
+
i = 0
|
219
|
+
begin
|
220
|
+
suffix = "_#{'%02d' % i}"
|
221
|
+
s3path = "#{path}#{timestamp_key}#{suffix}.gz"
|
222
|
+
i += 1
|
223
|
+
end while bucket.objects[s3path].exists?
|
224
|
+
s3path
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
end
|
@@ -0,0 +1,395 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'fluent/test'
|
4
|
+
require 'fluent/plugin/out_redshift'
|
5
|
+
require 'flexmock/test_unit'
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
|
9
|
+
class RedshiftOutputTest < Test::Unit::TestCase
|
10
|
+
def setup
|
11
|
+
require 'aws-sdk'
|
12
|
+
require 'pg'
|
13
|
+
require 'csv'
|
14
|
+
Fluent::Test.setup
|
15
|
+
end
|
16
|
+
|
17
|
+
CONFIG_BASE= %[
|
18
|
+
aws_key_id test_key_id
|
19
|
+
aws_sec_key test_sec_key
|
20
|
+
s3_bucket test_bucket
|
21
|
+
path log
|
22
|
+
redshift_host test_host
|
23
|
+
redshift_dbname test_db
|
24
|
+
redshift_user test_user
|
25
|
+
redshift_password test_password
|
26
|
+
redshift_tablename test_table
|
27
|
+
buffer_type memory
|
28
|
+
utc
|
29
|
+
]
|
30
|
+
CONFIG_CSV= %[
|
31
|
+
#{CONFIG_BASE}
|
32
|
+
file_type csv
|
33
|
+
]
|
34
|
+
CONFIG_TSV= %[
|
35
|
+
#{CONFIG_BASE}
|
36
|
+
file_type tsv
|
37
|
+
]
|
38
|
+
CONFIG_JSON = %[
|
39
|
+
#{CONFIG_BASE}
|
40
|
+
file_type json
|
41
|
+
]
|
42
|
+
CONFIG_PIPE_DELIMITER= %[
|
43
|
+
#{CONFIG_BASE}
|
44
|
+
delimiter |
|
45
|
+
]
|
46
|
+
CONFIG_PIPE_DELIMITER_WITH_NAME= %[
|
47
|
+
#{CONFIG_BASE}
|
48
|
+
file_type pipe
|
49
|
+
delimiter |
|
50
|
+
]
|
51
|
+
CONFIG=CONFIG_CSV
|
52
|
+
|
53
|
+
RECORD_CSV_A = {"log" => %[val_a,val_b,val_c,val_d]}
|
54
|
+
RECORD_CSV_B = {"log" => %[val_e,val_f,val_g,val_h]}
|
55
|
+
RECORD_TSV_A = {"log" => %[val_a\tval_b\tval_c\tval_d]}
|
56
|
+
RECORD_TSV_B = {"log" => %[val_e\tval_f\tval_g\tval_h]}
|
57
|
+
RECORD_JSON_A = {"log" => %[{"key_a" : "val_a", "key_b" : "val_b"}]}
|
58
|
+
RECORD_JSON_B = {"log" => %[{"key_c" : "val_c", "key_d" : "val_d"}]}
|
59
|
+
DEFAULT_TIME = Time.parse("2013-03-06 12:15:02 UTC").to_i
|
60
|
+
|
61
|
+
def create_driver(conf = CONFIG, tag='test.input')
|
62
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag).configure(conf)
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_driver_no_write(conf = CONFIG, tag='test.input')
|
66
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::RedshiftOutput, tag) do
|
67
|
+
def write(chunk)
|
68
|
+
chunk.read
|
69
|
+
end
|
70
|
+
end.configure(conf)
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_configure
|
74
|
+
assert_raise(Fluent::ConfigError) {
|
75
|
+
d = create_driver('')
|
76
|
+
}
|
77
|
+
assert_raise(Fluent::ConfigError) {
|
78
|
+
d = create_driver(CONFIG_BASE)
|
79
|
+
}
|
80
|
+
d = create_driver(CONFIG_CSV)
|
81
|
+
assert_equal "test_key_id", d.instance.aws_key_id
|
82
|
+
assert_equal "test_sec_key", d.instance.aws_sec_key
|
83
|
+
assert_equal "test_bucket", d.instance.s3_bucket
|
84
|
+
assert_equal "log/", d.instance.path
|
85
|
+
assert_equal "test_host", d.instance.redshift_host
|
86
|
+
assert_equal 5439, d.instance.redshift_port
|
87
|
+
assert_equal "test_db", d.instance.redshift_dbname
|
88
|
+
assert_equal "test_user", d.instance.redshift_user
|
89
|
+
assert_equal "test_password", d.instance.redshift_password
|
90
|
+
assert_equal "test_table", d.instance.redshift_tablename
|
91
|
+
assert_equal "csv", d.instance.file_type
|
92
|
+
assert_equal ",", d.instance.delimiter
|
93
|
+
assert_equal true, d.instance.utc
|
94
|
+
end
|
95
|
+
def test_configure_localtime
|
96
|
+
d = create_driver(CONFIG_CSV.gsub(/ *utc */, ''))
|
97
|
+
assert_equal false, d.instance.utc
|
98
|
+
end
|
99
|
+
def test_configure_no_path
|
100
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, ''))
|
101
|
+
assert_equal "", d.instance.path
|
102
|
+
end
|
103
|
+
def test_configure_root_path
|
104
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path /'))
|
105
|
+
assert_equal "", d.instance.path
|
106
|
+
end
|
107
|
+
def test_configure_path_with_slash
|
108
|
+
d = create_driver(CONFIG_CSV.gsub(/ *path *.+$/, 'path log/'))
|
109
|
+
assert_equal "log/", d.instance.path
|
110
|
+
end
|
111
|
+
def test_configure_tsv
|
112
|
+
d1 = create_driver(CONFIG_TSV)
|
113
|
+
assert_equal "tsv", d1.instance.file_type
|
114
|
+
assert_equal "\t", d1.instance.delimiter
|
115
|
+
end
|
116
|
+
def test_configure_json
|
117
|
+
d2 = create_driver(CONFIG_JSON)
|
118
|
+
assert_equal "json", d2.instance.file_type
|
119
|
+
assert_equal "\t", d2.instance.delimiter
|
120
|
+
end
|
121
|
+
def test_configure_original_file_type
|
122
|
+
d3 = create_driver(CONFIG_PIPE_DELIMITER)
|
123
|
+
assert_equal nil, d3.instance.file_type
|
124
|
+
assert_equal "|", d3.instance.delimiter
|
125
|
+
|
126
|
+
d4 = create_driver(CONFIG_PIPE_DELIMITER_WITH_NAME)
|
127
|
+
assert_equal "pipe", d4.instance.file_type
|
128
|
+
assert_equal "|", d4.instance.delimiter
|
129
|
+
end
|
130
|
+
|
131
|
+
def emit_csv(d)
|
132
|
+
d.emit(RECORD_CSV_A, DEFAULT_TIME)
|
133
|
+
d.emit(RECORD_CSV_B, DEFAULT_TIME)
|
134
|
+
end
|
135
|
+
def emit_tsv(d)
|
136
|
+
d.emit(RECORD_TSV_A, DEFAULT_TIME)
|
137
|
+
d.emit(RECORD_TSV_B, DEFAULT_TIME)
|
138
|
+
end
|
139
|
+
def emit_json(d)
|
140
|
+
d.emit(RECORD_JSON_A, DEFAULT_TIME)
|
141
|
+
d.emit(RECORD_JSON_B, DEFAULT_TIME)
|
142
|
+
end
|
143
|
+
|
144
|
+
def test_format_csv
|
145
|
+
d_csv = create_driver_no_write(CONFIG_CSV)
|
146
|
+
emit_csv(d_csv)
|
147
|
+
d_csv.expect_format RECORD_CSV_A['log'] + "\n"
|
148
|
+
d_csv.expect_format RECORD_CSV_B['log'] + "\n"
|
149
|
+
d_csv.run
|
150
|
+
end
|
151
|
+
def test_format_tsv
|
152
|
+
d_tsv = create_driver_no_write(CONFIG_TSV)
|
153
|
+
emit_tsv(d_tsv)
|
154
|
+
d_tsv.expect_format RECORD_TSV_A['log'] + "\n"
|
155
|
+
d_tsv.expect_format RECORD_TSV_B['log'] + "\n"
|
156
|
+
d_tsv.run
|
157
|
+
end
|
158
|
+
def test_format_json
|
159
|
+
d_json = create_driver_no_write(CONFIG_JSON)
|
160
|
+
emit_json(d_json)
|
161
|
+
d_json.expect_format RECORD_JSON_A.to_msgpack
|
162
|
+
d_json.expect_format RECORD_JSON_B.to_msgpack
|
163
|
+
d_json.run
|
164
|
+
end
|
165
|
+
|
166
|
+
class PGConnectionMock
|
167
|
+
def initialize(return_keys=['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h'])
|
168
|
+
@return_keys = return_keys
|
169
|
+
end
|
170
|
+
def exec(sql, &block)
|
171
|
+
if block_given? and /^select column_name from/ =~ sql
|
172
|
+
yield @return_keys.collect{|key| {'column_name' => key}}
|
173
|
+
end
|
174
|
+
end
|
175
|
+
def close
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def setup_pg_mock
|
180
|
+
# create mock of PG
|
181
|
+
def PG.connect(dbinfo)
|
182
|
+
return PGConnectionMock.new
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def setup_s3_mock(expected_data)
|
187
|
+
current_time = Time.now
|
188
|
+
|
189
|
+
# create mock of s3 object
|
190
|
+
s3obj = flexmock(AWS::S3::S3Object)
|
191
|
+
s3obj.should_receive(:exists?).with_any_args.and_return { false }
|
192
|
+
s3obj.should_receive(:write).with(
|
193
|
+
# pathname
|
194
|
+
on { |pathname|
|
195
|
+
data = nil
|
196
|
+
pathname.open { |f|
|
197
|
+
gz = Zlib::GzipReader.new(f)
|
198
|
+
data = gz.read
|
199
|
+
gz.close
|
200
|
+
}
|
201
|
+
assert_equal expected_data, data
|
202
|
+
},
|
203
|
+
:acl => :bucket_owner_full_control
|
204
|
+
).and_return { true }
|
205
|
+
|
206
|
+
# create mock of s3 object collection
|
207
|
+
s3obj_col = flexmock(AWS::S3::ObjectCollection)
|
208
|
+
s3obj_col.should_receive(:[]).with(
|
209
|
+
on { |key|
|
210
|
+
expected_key = current_time.utc.strftime("log/year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M_00.gz")
|
211
|
+
key == expected_key
|
212
|
+
}).
|
213
|
+
and_return {
|
214
|
+
s3obj
|
215
|
+
}
|
216
|
+
|
217
|
+
# create mock of s3 bucket
|
218
|
+
flexmock(AWS::S3::Bucket).new_instances do |bucket|
|
219
|
+
bucket.should_receive(:objects).with_any_args.
|
220
|
+
and_return {
|
221
|
+
s3obj_col
|
222
|
+
}
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def setup_mocks(expected_data)
|
227
|
+
setup_pg_mock
|
228
|
+
setup_s3_mock(expected_data) end
|
229
|
+
|
230
|
+
def test_write_with_csv
|
231
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
232
|
+
d_csv = create_driver
|
233
|
+
emit_csv(d_csv)
|
234
|
+
d_csv.run
|
235
|
+
end
|
236
|
+
|
237
|
+
def test_write_with_json
|
238
|
+
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
239
|
+
d_json = create_driver(CONFIG_JSON)
|
240
|
+
emit_json(d_json)
|
241
|
+
d_json.run
|
242
|
+
end
|
243
|
+
|
244
|
+
def test_write_with_json_hash_value
|
245
|
+
setup_mocks("val_a\t\"{\"\"foo\"\":\"\"var\"\"}\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
246
|
+
d_json = create_driver(CONFIG_JSON)
|
247
|
+
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
|
248
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
249
|
+
d_json.run
|
250
|
+
end
|
251
|
+
|
252
|
+
def test_write_with_json_array_value
|
253
|
+
setup_mocks("val_a\t\"[\"\"foo\"\",\"\"var\"\"]\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
254
|
+
d_json = create_driver(CONFIG_JSON)
|
255
|
+
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
|
256
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
257
|
+
d_json.run
|
258
|
+
end
|
259
|
+
|
260
|
+
def test_write_with_json_no_data
|
261
|
+
setup_mocks("")
|
262
|
+
d_json = create_driver(CONFIG_JSON)
|
263
|
+
d_json.emit("", DEFAULT_TIME)
|
264
|
+
d_json.emit("", DEFAULT_TIME)
|
265
|
+
d_json.run
|
266
|
+
end
|
267
|
+
|
268
|
+
def test_write_with_json_invalid_one_line
|
269
|
+
setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
|
270
|
+
d_json = create_driver(CONFIG_JSON)
|
271
|
+
d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
|
272
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
273
|
+
d_json.run
|
274
|
+
end
|
275
|
+
|
276
|
+
def test_write_with_json_no_available_data
|
277
|
+
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
|
278
|
+
d_json = create_driver(CONFIG_JSON)
|
279
|
+
d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
|
280
|
+
d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
|
281
|
+
d_json.run
|
282
|
+
end
|
283
|
+
|
284
|
+
def test_write_redshift_connection_error
|
285
|
+
def PG.connect(dbinfo)
|
286
|
+
return Class.new do
|
287
|
+
def initialize(return_keys=[]); end
|
288
|
+
def exec(sql)
|
289
|
+
raise PG::Error, "redshift connection error"
|
290
|
+
end
|
291
|
+
def close; end
|
292
|
+
end.new
|
293
|
+
end
|
294
|
+
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
295
|
+
|
296
|
+
d_csv = create_driver
|
297
|
+
emit_csv(d_csv)
|
298
|
+
assert_raise(PG::Error) {
|
299
|
+
d_csv.run
|
300
|
+
}
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_write_redshift_logic_error
|
304
|
+
PG::Error.module_eval { attr_accessor :result}
|
305
|
+
def PG.connect(dbinfo)
|
306
|
+
return Class.new do
|
307
|
+
def initialize(return_keys=[]); end
|
308
|
+
def exec(sql)
|
309
|
+
error = PG::Error.new("redshift logic error")
|
310
|
+
error.result = "logic error"
|
311
|
+
raise error
|
312
|
+
end
|
313
|
+
def close; end
|
314
|
+
end.new
|
315
|
+
end
|
316
|
+
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
317
|
+
|
318
|
+
d_csv = create_driver
|
319
|
+
emit_csv(d_csv)
|
320
|
+
assert_nothing_raised {
|
321
|
+
d_csv.run
|
322
|
+
}
|
323
|
+
end
|
324
|
+
|
325
|
+
def test_write_with_json_redshift_connection_error
|
326
|
+
def PG.connect(dbinfo)
|
327
|
+
return Class.new do
|
328
|
+
def initialize(return_keys=[]); end
|
329
|
+
def exec(sql, &block)
|
330
|
+
error = PG::Error.new("redshift connection error")
|
331
|
+
raise error
|
332
|
+
end
|
333
|
+
def close; end
|
334
|
+
end.new
|
335
|
+
end
|
336
|
+
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
337
|
+
|
338
|
+
d_json = create_driver(CONFIG_JSON)
|
339
|
+
emit_json(d_json)
|
340
|
+
assert_raise(PG::Error) {
|
341
|
+
d_json.run
|
342
|
+
}
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_write_with_json_no_table_on_redshift
|
346
|
+
def PG.connect(dbinfo)
|
347
|
+
return Class.new do
|
348
|
+
def initialize(return_keys=[]); end
|
349
|
+
def exec(sql, &block)
|
350
|
+
yield [] if block_given?
|
351
|
+
end
|
352
|
+
def close; end
|
353
|
+
end.new
|
354
|
+
end
|
355
|
+
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
356
|
+
|
357
|
+
d_json = create_driver(CONFIG_JSON)
|
358
|
+
emit_json(d_json)
|
359
|
+
assert_nothing_raised {
|
360
|
+
d_json.run
|
361
|
+
}
|
362
|
+
end
|
363
|
+
|
364
|
+
def test_write_with_json_failed_to_get_columns
|
365
|
+
def PG.connect(dbinfo)
|
366
|
+
return Class.new do
|
367
|
+
def initialize(return_keys=[]); end
|
368
|
+
def exec(sql, &block)
|
369
|
+
end
|
370
|
+
def close; end
|
371
|
+
end.new
|
372
|
+
end
|
373
|
+
setup_s3_mock("")
|
374
|
+
|
375
|
+
d_json = create_driver(CONFIG_JSON)
|
376
|
+
emit_json(d_json)
|
377
|
+
assert_raise(RuntimeError, "failed to fetch the redshift table definition.") {
|
378
|
+
d_json.run
|
379
|
+
}
|
380
|
+
end
|
381
|
+
|
382
|
+
def test_write_with_json_failed_to_generate_tsv
|
383
|
+
flexmock(CSV).should_receive(:generate).with_any_args.
|
384
|
+
and_return {
|
385
|
+
raise "failed to generate tsv."
|
386
|
+
}
|
387
|
+
setup_s3_mock("")
|
388
|
+
|
389
|
+
d_json = create_driver(CONFIG_JSON)
|
390
|
+
emit_json(d_json)
|
391
|
+
assert_nothing_raised {
|
392
|
+
d_json.run
|
393
|
+
}
|
394
|
+
end
|
395
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-redshift
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Masashi Miyazaki
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-03-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: fluentd
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.10.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.10.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: aws-sdk
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 1.6.3
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.6.3
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: pg
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.14.0
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.14.0
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: simplecov
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.5.4
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.5.4
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: flexmock
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.3.1
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.3.1
|
110
|
+
description: Amazon Redshift output plugin for Fluentd
|
111
|
+
email:
|
112
|
+
- mmasashi@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- .gitignore
|
118
|
+
- Gemfile
|
119
|
+
- README.md
|
120
|
+
- Rakefile
|
121
|
+
- VERSION
|
122
|
+
- fluent-plugin-redshift.gemspec
|
123
|
+
- lib/fluent/plugin/out_redshift.rb
|
124
|
+
- test/plugin/test_out_redshift.rb
|
125
|
+
- test/test_helper.rb
|
126
|
+
homepage: https://github.com/hapyrus/fluent-plugin-redshift
|
127
|
+
licenses: []
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
none: false
|
134
|
+
requirements:
|
135
|
+
- - ! '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
requirements: []
|
145
|
+
rubyforge_project:
|
146
|
+
rubygems_version: 1.8.23
|
147
|
+
signing_key:
|
148
|
+
specification_version: 3
|
149
|
+
summary: Amazon Redshift output plugin for Fluentd
|
150
|
+
test_files:
|
151
|
+
- test/plugin/test_out_redshift.rb
|
152
|
+
- test/test_helper.rb
|