fluent-plugin-redshift 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -2
- data/VERSION +1 -1
- data/lib/fluent/plugin/out_redshift.rb +116 -25
- data/test/plugin/test_out_redshift.rb +33 -3
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ff3e820b9cccac040efc73a11cf261f4a307dda
|
4
|
+
data.tar.gz: f00f6cc70b5cf75ac399ba65cc25f773d02123db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 096462829885b50f9cd01852843c80fd8096d028d8e9c0f45160fc2be4f3f66e43f7e28cf4b137c79f46bbbaad8f042693ee72e9b00f793e16a2dd280d1e7b9b
|
7
|
+
data.tar.gz: 739976ff4077c4ce34c51ee311dcdc10dd186f9f45ab39414292f8f49b9d87d2ad2dec5f2540f3f465bf6604932e05c115d2da5250b619236df580f7840ebf29
|
data/README.md
CHANGED
@@ -19,10 +19,14 @@ Format:
|
|
19
19
|
# s3 (for copying data to redshift)
|
20
20
|
aws_key_id YOUR_AWS_KEY_ID
|
21
21
|
aws_sec_key YOUR_AWS_SECRET_KEY
|
22
|
+
## or Use IAM Role instead of credentials.
|
23
|
+
aws_iam_role arn:aws:iam::ACCOUNT_ID:role/ROLE_NAME
|
24
|
+
|
22
25
|
s3_bucket YOUR_S3_BUCKET
|
23
26
|
s3_endpoint YOUR_S3_BUCKET_END_POINT
|
24
27
|
path YOUR_S3_PATH
|
25
28
|
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
29
|
+
s3_server_side_encryption S3_SERVER_SIDE_ENCRYPTION
|
26
30
|
|
27
31
|
# redshift
|
28
32
|
redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
|
@@ -32,6 +36,7 @@ Format:
|
|
32
36
|
redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
|
33
37
|
redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
|
34
38
|
redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
|
39
|
+
redshift_copy_columns COLMUNS_FOR_COPY
|
35
40
|
file_type [tsv|csv|json|msgpack]
|
36
41
|
|
37
42
|
# buffer
|
@@ -64,6 +69,7 @@ Example (watch and upload json formatted apache log):
|
|
64
69
|
s3_endpoint s3.amazonaws.com
|
65
70
|
path path/on/s3/apache_json_log/
|
66
71
|
timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
|
72
|
+
s3_server_side_encryption aes256
|
67
73
|
|
68
74
|
# redshift
|
69
75
|
redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
|
@@ -86,9 +92,11 @@ Example (watch and upload json formatted apache log):
|
|
86
92
|
|
87
93
|
+ `type` (required) : The value must be `redshift`.
|
88
94
|
|
89
|
-
+ `aws_key_id`
|
95
|
+
+ `aws_key_id` : AWS access key id to access s3 bucket.
|
96
|
+
|
97
|
+
+ `aws_sec_key` : AWS secret key id to access s3 bucket.
|
90
98
|
|
91
|
-
+ `
|
99
|
+
+ `aws_iam_role` : AWS IAM Role name to access s3 bucket and copy into redshift.
|
92
100
|
|
93
101
|
+ `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
|
94
102
|
|
@@ -105,6 +113,8 @@ Example (watch and upload json formatted apache log):
|
|
105
113
|
hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
|
106
114
|
</pre>
|
107
115
|
|
116
|
+
+ `s3_server_side_encryption` : S3 Server-Side Encryption (Only aes256 is supported)
|
117
|
+
|
108
118
|
+ `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
|
109
119
|
|
110
120
|
+ `redshift_port` (required) : port number.
|
@@ -121,6 +131,8 @@ Example (watch and upload json formatted apache log):
|
|
121
131
|
|
122
132
|
+ `redshift_connect_timeout` : maximum time to wait for connection to succeed.
|
123
133
|
|
134
|
+
+ `redshift_copy_columns` : columns for copying. Value needs to be comma-separated like `id,name,age`
|
135
|
+
|
124
136
|
+ `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
|
125
137
|
|
126
138
|
+ `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
@@ -22,34 +22,81 @@ class RedshiftOutput < BufferedOutput
|
|
22
22
|
|
23
23
|
config_param :record_log_tag, :string, :default => 'log'
|
24
24
|
# s3
|
25
|
-
config_param :aws_key_id, :string, :secret => true
|
26
|
-
|
27
|
-
config_param :
|
28
|
-
|
29
|
-
config_param :
|
30
|
-
|
25
|
+
config_param :aws_key_id, :string, :secret => true, :default => nil,
|
26
|
+
:desc => "AWS access key id to access s3 bucket."
|
27
|
+
config_param :aws_sec_key, :string, :secret => true, :default => nil,
|
28
|
+
:desc => "AWS secret key id to access s3 bucket."
|
29
|
+
config_param :aws_iam_role, :string, :secret => true, :default => nil,
|
30
|
+
:desc => "AWS IAM Role to access s3 bucket."
|
31
|
+
config_param :s3_bucket, :string,
|
32
|
+
:desc => <<-DESC
|
33
|
+
S3 bucket name.
|
34
|
+
S3 bucket must be same as the region of your Redshift cluster.
|
35
|
+
|
36
|
+
DESC
|
37
|
+
config_param :s3_endpoint, :string, :default => nil,
|
38
|
+
:desc => "S3 endpoint."
|
39
|
+
config_param :path, :string, :default => "",
|
40
|
+
:desc => "S3 path to input."
|
41
|
+
config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M',
|
42
|
+
:desc => <<-DESC
|
43
|
+
The format of the object keys.
|
44
|
+
It can include date-format directives.
|
45
|
+
DESC
|
31
46
|
config_param :utc, :bool, :default => false
|
47
|
+
config_param :s3_server_side_encryption, :string, :default => nil,
|
48
|
+
:desc => "S3 Server-Side Encryption (Only aes256 is supported)."
|
32
49
|
# redshift
|
33
|
-
config_param :redshift_host, :string
|
34
|
-
|
35
|
-
config_param :
|
36
|
-
|
37
|
-
config_param :
|
38
|
-
|
39
|
-
config_param :
|
50
|
+
config_param :redshift_host, :string,
|
51
|
+
:desc => "The end point(or hostname) of your Amazon Redshift cluster."
|
52
|
+
config_param :redshift_port, :integer, :default => 5439,
|
53
|
+
:desc => "Port number."
|
54
|
+
config_param :redshift_dbname, :string,
|
55
|
+
:desc => "Database name."
|
56
|
+
config_param :redshift_user, :string,
|
57
|
+
:desc => "User name."
|
58
|
+
config_param :redshift_password, :string, :secret => true,
|
59
|
+
:desc => "Password for the user name."
|
60
|
+
config_param :redshift_tablename, :string,
|
61
|
+
:desc => "Table name to store data."
|
62
|
+
config_param :redshift_schemaname, :string, :default => nil,
|
63
|
+
:desc => <<-DESC
|
64
|
+
Schema name to store data. By default, this option is not
|
65
|
+
Set and find table without schema as your own search_path.
|
66
|
+
DESC
|
40
67
|
config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
|
41
68
|
config_param :redshift_copy_options, :string , :default => nil
|
42
|
-
config_param :redshift_connect_timeout, :integer, :default => 10
|
69
|
+
config_param :redshift_connect_timeout, :integer, :default => 10,
|
70
|
+
:desc => "Maximum time to wait for connection to succeed."
|
71
|
+
config_param :redshift_copy_columns, :string, :default => nil,
|
72
|
+
:desc => <<-DESC
|
73
|
+
Columns for copying.
|
74
|
+
Value needs to be comma-separated like id,name,age
|
75
|
+
DESC
|
43
76
|
# file format
|
44
|
-
config_param :file_type, :string, :default => nil
|
45
|
-
|
77
|
+
config_param :file_type, :string, :default => nil,
|
78
|
+
:desc => "File format of the source data. csv, tsv, msgpack or json are available."
|
79
|
+
config_param :delimiter, :string, :default => nil,
|
80
|
+
:desc => <<-DESC
|
81
|
+
Delimiter of the source data.
|
82
|
+
This option will be ignored if file_type is specified.
|
83
|
+
DESC
|
46
84
|
# maintenance
|
47
|
-
config_param :maintenance_file_path, :string, :default => nil
|
85
|
+
config_param :maintenance_file_path, :string, :default => nil,
|
86
|
+
:desc => <<-DESC
|
87
|
+
Path of maintenance file. plugin skip processing and keep retrying
|
88
|
+
during a file existing in this file path.
|
89
|
+
To avoid data loss due to too many retries caused by long mainenance,
|
90
|
+
setting retry_limit and retry_wait is recommended.
|
91
|
+
DESC
|
48
92
|
# for debug
|
49
93
|
config_param :log_suffix, :string, :default => ''
|
50
94
|
|
51
95
|
def configure(conf)
|
52
96
|
super
|
97
|
+
if !check_credentials
|
98
|
+
raise ConfigError, "aws_key_id and aws_sec_key is required. or, use aws_iam_role instead."
|
99
|
+
end
|
53
100
|
@path = "#{@path}/" unless @path.end_with?('/') # append last slash
|
54
101
|
@path = @path[1..-1] if @path.start_with?('/') # remove head slash
|
55
102
|
@utc = true if conf['utc']
|
@@ -64,17 +111,26 @@ class RedshiftOutput < BufferedOutput
|
|
64
111
|
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
65
112
|
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
66
113
|
@table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
|
67
|
-
@
|
114
|
+
@redshift_copy_columns = if !@redshift_copy_columns.to_s.empty?
|
115
|
+
@redshift_copy_columns.split(/[,\s]+/)
|
116
|
+
else
|
117
|
+
nil
|
118
|
+
end
|
119
|
+
@copy_sql_template = build_redshift_copy_sql_template
|
68
120
|
@maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
|
121
|
+
@s3_server_side_encryption = @s3_server_side_encryption.to_sym if s3_server_side_encryption
|
69
122
|
end
|
70
123
|
|
71
124
|
def start
|
72
125
|
super
|
73
126
|
# init s3 conf
|
74
|
-
options = {
|
75
|
-
|
76
|
-
|
77
|
-
|
127
|
+
options = {}
|
128
|
+
if @aws_key_id && @aws_sec_key
|
129
|
+
options = {
|
130
|
+
:access_key_id => @aws_key_id,
|
131
|
+
:secret_access_key => @aws_sec_key
|
132
|
+
}
|
133
|
+
end
|
78
134
|
options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
|
79
135
|
@s3 = AWS::S3.new(options)
|
80
136
|
@bucket = @s3.buckets[@s3_bucket]
|
@@ -115,14 +171,15 @@ class RedshiftOutput < BufferedOutput
|
|
115
171
|
|
116
172
|
# upload gz to s3
|
117
173
|
@bucket.objects[s3path].write(Pathname.new(tmp.path),
|
118
|
-
:acl => :bucket_owner_full_control
|
174
|
+
:acl => :bucket_owner_full_control,
|
175
|
+
:server_side_encryption => @s3_server_side_encryption)
|
119
176
|
|
120
177
|
# close temp file
|
121
178
|
tmp.close!
|
122
179
|
|
123
180
|
# copy gz on s3 to redshift
|
124
181
|
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
125
|
-
sql = @copy_sql_template %
|
182
|
+
sql = @copy_sql_template % s3_uri
|
126
183
|
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
127
184
|
|
128
185
|
begin
|
@@ -146,6 +203,21 @@ class RedshiftOutput < BufferedOutput
|
|
146
203
|
|
147
204
|
private
|
148
205
|
|
206
|
+
def build_redshift_copy_sql_template
|
207
|
+
copy_columns = if @redshift_copy_columns
|
208
|
+
"(#{@redshift_copy_columns.join(",")})"
|
209
|
+
else
|
210
|
+
''
|
211
|
+
end
|
212
|
+
credentials = if @aws_key_id && @aws_sec_key
|
213
|
+
"CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=#{@aws_sec_key}'"
|
214
|
+
else
|
215
|
+
"CREDENTIALS 'aws_iam_role=#{@aws_iam_role}'"
|
216
|
+
end
|
217
|
+
"copy #{@table_name_with_schema}#{copy_columns} from '%s' #{credentials} delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
|
218
|
+
end
|
219
|
+
|
220
|
+
|
149
221
|
def json?
|
150
222
|
@file_type == 'json'
|
151
223
|
end
|
@@ -175,6 +247,14 @@ class RedshiftOutput < BufferedOutput
|
|
175
247
|
return nil
|
176
248
|
end
|
177
249
|
|
250
|
+
if @redshift_copy_columns
|
251
|
+
unknown_colmns = @redshift_copy_columns - redshift_table_columns
|
252
|
+
unless unknown_colmns.empty?
|
253
|
+
raise Fluent::ConfigError, "missing columns included in redshift_copy_columns - missing columns:\"#{unknown_colmns.join(',')}\""
|
254
|
+
end
|
255
|
+
redshift_table_columns = @redshift_copy_columns
|
256
|
+
end
|
257
|
+
|
178
258
|
# convert json to tsv format text
|
179
259
|
gzw = nil
|
180
260
|
begin
|
@@ -186,7 +266,8 @@ class RedshiftOutput < BufferedOutput
|
|
186
266
|
tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
|
187
267
|
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
188
268
|
rescue => e
|
189
|
-
|
269
|
+
text = record.is_a?(Hash) ? record[@record_log_tag] : record
|
270
|
+
$log.error format_log("failed to create table text from #{@file_type}. text=(#{text})"), :error=>e.to_s
|
190
271
|
$log.error_backtrace
|
191
272
|
end
|
192
273
|
end
|
@@ -261,6 +342,16 @@ class RedshiftOutput < BufferedOutput
|
|
261
342
|
s3path
|
262
343
|
end
|
263
344
|
|
345
|
+
def check_credentials
|
346
|
+
if @aws_key_id && @aws_sec_key
|
347
|
+
true
|
348
|
+
elsif @aws_iam_role
|
349
|
+
true
|
350
|
+
else
|
351
|
+
false
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
264
355
|
class RedshiftError < StandardError
|
265
356
|
def initialize(msg)
|
266
357
|
case msg
|
@@ -20,6 +20,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
20
20
|
CONFIG_BASE= %[
|
21
21
|
aws_key_id test_key_id
|
22
22
|
aws_sec_key test_sec_key
|
23
|
+
aws_iam_role test_iam_role
|
23
24
|
s3_bucket test_bucket
|
24
25
|
path log
|
25
26
|
redshift_host test_host
|
@@ -96,6 +97,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
96
97
|
d = create_driver(CONFIG_CSV)
|
97
98
|
assert_equal "test_key_id", d.instance.aws_key_id
|
98
99
|
assert_equal "test_sec_key", d.instance.aws_sec_key
|
100
|
+
assert_equal "test_iam_role", d.instance.aws_iam_role
|
99
101
|
assert_equal "test_bucket", d.instance.s3_bucket
|
100
102
|
assert_equal "log/", d.instance.path
|
101
103
|
assert_equal "test_host", d.instance.redshift_host
|
@@ -111,6 +113,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
111
113
|
assert_equal ",", d.instance.delimiter
|
112
114
|
assert_equal true, d.instance.utc
|
113
115
|
assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
|
116
|
+
assert_equal nil, d.instance.redshift_copy_columns
|
114
117
|
end
|
115
118
|
def test_configure_with_schemaname
|
116
119
|
d = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
@@ -168,6 +171,15 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
168
171
|
d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
|
169
172
|
assert_equal "", d.instance.log_suffix
|
170
173
|
end
|
174
|
+
def test_configure_redshift_copy_columns
|
175
|
+
d = create_driver(CONFIG_CSV + "\n redshift_copy_columns id,name, age created_at")
|
176
|
+
assert_equal %w(id name age created_at), d.instance.redshift_copy_columns
|
177
|
+
assert_match /^copy test_table\(id,name,age,created_at\) from/, d.instance.instance_variable_get("@copy_sql_template")
|
178
|
+
end
|
179
|
+
def test_configure_s3_server_side_encryption
|
180
|
+
d = create_driver(CONFIG_CSV + "\n s3_server_side_encryption aes256")
|
181
|
+
assert_equal :aes256, d.instance.s3_server_side_encryption
|
182
|
+
end
|
171
183
|
|
172
184
|
def emit_csv(d)
|
173
185
|
d.emit(RECORD_CSV_A, DEFAULT_TIME)
|
@@ -231,9 +243,9 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
231
243
|
end
|
232
244
|
copy_query_regex =
|
233
245
|
if schema_name
|
234
|
-
/\Acopy #{schema_name}.#{table_name} from/
|
246
|
+
/\Acopy #{schema_name}.#{table_name}(\(.+\))? from/
|
235
247
|
else
|
236
|
-
/\Acopy #{table_name} from/
|
248
|
+
/\Acopy #{table_name}(\(.+\))? from/
|
237
249
|
end
|
238
250
|
|
239
251
|
flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
|
@@ -274,7 +286,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
274
286
|
}
|
275
287
|
assert_equal expected_data, data
|
276
288
|
},
|
277
|
-
:acl => :bucket_owner_full_control
|
289
|
+
:acl => :bucket_owner_full_control,
|
290
|
+
:server_side_encryption => nil
|
278
291
|
).and_return { true }
|
279
292
|
|
280
293
|
# create mock of s3 object collection
|
@@ -476,6 +489,23 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
476
489
|
}
|
477
490
|
end
|
478
491
|
|
492
|
+
def test_write_with_json_with_copy_columns
|
493
|
+
setup_mocks(%[val_a\tval_b\n])
|
494
|
+
setup_tempfile_mock_to_be_closed
|
495
|
+
d_json = create_driver(CONFIG_JSON + "\n redshift_copy_columns key_a,key_b")
|
496
|
+
emit_json(d_json)
|
497
|
+
assert_equal true, d_json.run
|
498
|
+
end
|
499
|
+
|
500
|
+
def test_write_with_json_uknown_columns_in_copy_columns
|
501
|
+
setup_mocks("")
|
502
|
+
d_json = create_driver(CONFIG_JSON + "\n redshift_copy_columns key_a,key_z")
|
503
|
+
emit_json(d_json)
|
504
|
+
assert_raise(Fluent::ConfigError, "missing columns included in redshift_copy_columns - missing columns:\"key_z\"") {
|
505
|
+
d_json.run
|
506
|
+
}
|
507
|
+
end
|
508
|
+
|
479
509
|
def test_write_with_json_fetch_column_with_schema
|
480
510
|
setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
|
481
511
|
schema_name: 'test_schema')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masashi Miyazaki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
135
|
version: '0'
|
136
136
|
requirements: []
|
137
137
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.
|
138
|
+
rubygems_version: 2.0.14.1
|
139
139
|
signing_key:
|
140
140
|
specification_version: 4
|
141
141
|
summary: Amazon Redshift output plugin for Fluentd
|