fluent-plugin-redshift 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 208e99381f5503be7e6af6fa3ad54fc2cbeb9bea
4
- data.tar.gz: ed2707d372c126d420fd907bc409b5d0fede132d
3
+ metadata.gz: 6ff3e820b9cccac040efc73a11cf261f4a307dda
4
+ data.tar.gz: f00f6cc70b5cf75ac399ba65cc25f773d02123db
5
5
  SHA512:
6
- metadata.gz: f8f93c9b5bb3ba860a3c6900c1889453f9417a36bce39def88e82b5ca4bc7dd8a936b6cf805d246317daafd9f2cab168fa7d890a61bbad7b719b14c40321954b
7
- data.tar.gz: d19b4d4751c71d293493cce89cf93224115c762a32a00e363ce774ffdb40798598f656a6e7de126e0b13012011c311a52bef86804f785ec490b52ad67755876b
6
+ metadata.gz: 096462829885b50f9cd01852843c80fd8096d028d8e9c0f45160fc2be4f3f66e43f7e28cf4b137c79f46bbbaad8f042693ee72e9b00f793e16a2dd280d1e7b9b
7
+ data.tar.gz: 739976ff4077c4ce34c51ee311dcdc10dd186f9f45ab39414292f8f49b9d87d2ad2dec5f2540f3f465bf6604932e05c115d2da5250b619236df580f7840ebf29
data/README.md CHANGED
@@ -19,10 +19,14 @@ Format:
19
19
  # s3 (for copying data to redshift)
20
20
  aws_key_id YOUR_AWS_KEY_ID
21
21
  aws_sec_key YOUR_AWS_SECRET_KEY
22
+ ## or Use IAM Role instead of credentials.
23
+ aws_iam_role arn:aws:iam::ACCOUNT_ID:role/ROLE_NAME
24
+
22
25
  s3_bucket YOUR_S3_BUCKET
23
26
  s3_endpoint YOUR_S3_BUCKET_END_POINT
24
27
  path YOUR_S3_PATH
25
28
  timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
29
+ s3_server_side_encryption S3_SERVER_SIDE_ENCRYPTION
26
30
 
27
31
  # redshift
28
32
  redshift_host YOUR_AMAZON_REDSHIFT_CLUSTER_END_POINT
@@ -32,6 +36,7 @@ Format:
32
36
  redshift_password YOUR_AMAZON_REDSHIFT_CLUSTER_PASSWORD
33
37
  redshift_schemaname YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_SCHEMA_NAME
34
38
  redshift_tablename YOUR_AMAZON_REDSHIFT_CLUSTER_TARGET_TABLE_NAME
39
+ redshift_copy_columns COLMUNS_FOR_COPY
35
40
  file_type [tsv|csv|json|msgpack]
36
41
 
37
42
  # buffer
@@ -64,6 +69,7 @@ Example (watch and upload json formatted apache log):
64
69
  s3_endpoint s3.amazonaws.com
65
70
  path path/on/s3/apache_json_log/
66
71
  timestamp_key_format year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M
72
+ s3_server_side_encryption aes256
67
73
 
68
74
  # redshift
69
75
  redshift_host xxx-yyy-zzz.xxxxxxxxxx.us-east-1.redshift.amazonaws.com
@@ -86,9 +92,11 @@ Example (watch and upload json formatted apache log):
86
92
 
87
93
  + `type` (required) : The value must be `redshift`.
88
94
 
89
- + `aws_key_id` (required) : AWS access key id to access s3 bucket.
95
+ + `aws_key_id` : AWS access key id to access s3 bucket.
96
+
97
+ + `aws_sec_key` : AWS secret key id to access s3 bucket.
90
98
 
91
- + `aws_sec_key` (required) : AWS secret key id to access s3 bucket.
99
+ + `aws_iam_role` : AWS IAM Role name to access s3 bucket and copy into redshift.
92
100
 
93
101
  + `s3_bucket` (required) : s3 bucket name. S3 bucket must be same as the region of your Redshift cluster.
94
102
 
@@ -105,6 +113,8 @@ Example (watch and upload json formatted apache log):
105
113
  hapyrus-example/apache_json_log/year=2013/month=03/day=05/hour=12/20130305_1230_00.gz
106
114
  </pre>
107
115
 
116
+ + `s3_server_side_encryption` : S3 Server-Side Encryption (Only aes256 is supported)
117
+
108
118
  + `redshift_host` (required) : the end point(or hostname) of your Amazon Redshift cluster.
109
119
 
110
120
  + `redshift_port` (required) : port number.
@@ -121,6 +131,8 @@ Example (watch and upload json formatted apache log):
121
131
 
122
132
  + `redshift_connect_timeout` : maximum time to wait for connection to succeed.
123
133
 
134
+ + `redshift_copy_columns` : columns for copying. Value needs to be comma-separated like `id,name,age`
135
+
124
136
  + `file_type` : file format of the source data. `csv`, `tsv`, `msgpack` or `json` are available.
125
137
 
126
138
  + `delimiter` : delimiter of the source data. This option will be ignored if `file_type` is specified.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -22,34 +22,81 @@ class RedshiftOutput < BufferedOutput
22
22
 
23
23
  config_param :record_log_tag, :string, :default => 'log'
24
24
  # s3
25
- config_param :aws_key_id, :string, :secret => true
26
- config_param :aws_sec_key, :string, :secret => true
27
- config_param :s3_bucket, :string
28
- config_param :s3_endpoint, :string, :default => nil
29
- config_param :path, :string, :default => ""
30
- config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M'
25
+ config_param :aws_key_id, :string, :secret => true, :default => nil,
26
+ :desc => "AWS access key id to access s3 bucket."
27
+ config_param :aws_sec_key, :string, :secret => true, :default => nil,
28
+ :desc => "AWS secret key id to access s3 bucket."
29
+ config_param :aws_iam_role, :string, :secret => true, :default => nil,
30
+ :desc => "AWS IAM Role to access s3 bucket."
31
+ config_param :s3_bucket, :string,
32
+ :desc => <<-DESC
33
+ S3 bucket name.
34
+ S3 bucket must be same as the region of your Redshift cluster.
35
+
36
+ DESC
37
+ config_param :s3_endpoint, :string, :default => nil,
38
+ :desc => "S3 endpoint."
39
+ config_param :path, :string, :default => "",
40
+ :desc => "S3 path to input."
41
+ config_param :timestamp_key_format, :string, :default => 'year=%Y/month=%m/day=%d/hour=%H/%Y%m%d-%H%M',
42
+ :desc => <<-DESC
43
+ The format of the object keys.
44
+ It can include date-format directives.
45
+ DESC
31
46
  config_param :utc, :bool, :default => false
47
+ config_param :s3_server_side_encryption, :string, :default => nil,
48
+ :desc => "S3 Server-Side Encryption (Only aes256 is supported)."
32
49
  # redshift
33
- config_param :redshift_host, :string
34
- config_param :redshift_port, :integer, :default => 5439
35
- config_param :redshift_dbname, :string
36
- config_param :redshift_user, :string
37
- config_param :redshift_password, :string, :secret => true
38
- config_param :redshift_tablename, :string
39
- config_param :redshift_schemaname, :string, :default => nil
50
+ config_param :redshift_host, :string,
51
+ :desc => "The end point(or hostname) of your Amazon Redshift cluster."
52
+ config_param :redshift_port, :integer, :default => 5439,
53
+ :desc => "Port number."
54
+ config_param :redshift_dbname, :string,
55
+ :desc => "Database name."
56
+ config_param :redshift_user, :string,
57
+ :desc => "User name."
58
+ config_param :redshift_password, :string, :secret => true,
59
+ :desc => "Password for the user name."
60
+ config_param :redshift_tablename, :string,
61
+ :desc => "Table name to store data."
62
+ config_param :redshift_schemaname, :string, :default => nil,
63
+ :desc => <<-DESC
64
+ Schema name to store data. By default, this option is not
65
+ Set and find table without schema as your own search_path.
66
+ DESC
40
67
  config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
41
68
  config_param :redshift_copy_options, :string , :default => nil
42
- config_param :redshift_connect_timeout, :integer, :default => 10
69
+ config_param :redshift_connect_timeout, :integer, :default => 10,
70
+ :desc => "Maximum time to wait for connection to succeed."
71
+ config_param :redshift_copy_columns, :string, :default => nil,
72
+ :desc => <<-DESC
73
+ Columns for copying.
74
+ Value needs to be comma-separated like id,name,age
75
+ DESC
43
76
  # file format
44
- config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
45
- config_param :delimiter, :string, :default => nil
77
+ config_param :file_type, :string, :default => nil,
78
+ :desc => "File format of the source data. csv, tsv, msgpack or json are available."
79
+ config_param :delimiter, :string, :default => nil,
80
+ :desc => <<-DESC
81
+ Delimiter of the source data.
82
+ This option will be ignored if file_type is specified.
83
+ DESC
46
84
  # maintenance
47
- config_param :maintenance_file_path, :string, :default => nil
85
+ config_param :maintenance_file_path, :string, :default => nil,
86
+ :desc => <<-DESC
87
+ Path of maintenance file. plugin skip processing and keep retrying
88
+ during a file existing in this file path.
89
+ To avoid data loss due to too many retries caused by long mainenance,
90
+ setting retry_limit and retry_wait is recommended.
91
+ DESC
48
92
  # for debug
49
93
  config_param :log_suffix, :string, :default => ''
50
94
 
51
95
  def configure(conf)
52
96
  super
97
+ if !check_credentials
98
+ raise ConfigError, "aws_key_id and aws_sec_key is required. or, use aws_iam_role instead."
99
+ end
53
100
  @path = "#{@path}/" unless @path.end_with?('/') # append last slash
54
101
  @path = @path[1..-1] if @path.start_with?('/') # remove head slash
55
102
  @utc = true if conf['utc']
@@ -64,17 +111,26 @@ class RedshiftOutput < BufferedOutput
64
111
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
65
112
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
66
113
  @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
67
- @copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
114
+ @redshift_copy_columns = if !@redshift_copy_columns.to_s.empty?
115
+ @redshift_copy_columns.split(/[,\s]+/)
116
+ else
117
+ nil
118
+ end
119
+ @copy_sql_template = build_redshift_copy_sql_template
68
120
  @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
121
+ @s3_server_side_encryption = @s3_server_side_encryption.to_sym if s3_server_side_encryption
69
122
  end
70
123
 
71
124
  def start
72
125
  super
73
126
  # init s3 conf
74
- options = {
75
- :access_key_id => @aws_key_id,
76
- :secret_access_key => @aws_sec_key
77
- }
127
+ options = {}
128
+ if @aws_key_id && @aws_sec_key
129
+ options = {
130
+ :access_key_id => @aws_key_id,
131
+ :secret_access_key => @aws_sec_key
132
+ }
133
+ end
78
134
  options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
79
135
  @s3 = AWS::S3.new(options)
80
136
  @bucket = @s3.buckets[@s3_bucket]
@@ -115,14 +171,15 @@ class RedshiftOutput < BufferedOutput
115
171
 
116
172
  # upload gz to s3
117
173
  @bucket.objects[s3path].write(Pathname.new(tmp.path),
118
- :acl => :bucket_owner_full_control)
174
+ :acl => :bucket_owner_full_control,
175
+ :server_side_encryption => @s3_server_side_encryption)
119
176
 
120
177
  # close temp file
121
178
  tmp.close!
122
179
 
123
180
  # copy gz on s3 to redshift
124
181
  s3_uri = "s3://#{@s3_bucket}/#{s3path}"
125
- sql = @copy_sql_template % [s3_uri, @aws_sec_key]
182
+ sql = @copy_sql_template % s3_uri
126
183
  $log.debug format_log("start copying. s3_uri=#{s3_uri}")
127
184
 
128
185
  begin
@@ -146,6 +203,21 @@ class RedshiftOutput < BufferedOutput
146
203
 
147
204
  private
148
205
 
206
+ def build_redshift_copy_sql_template
207
+ copy_columns = if @redshift_copy_columns
208
+ "(#{@redshift_copy_columns.join(",")})"
209
+ else
210
+ ''
211
+ end
212
+ credentials = if @aws_key_id && @aws_sec_key
213
+ "CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=#{@aws_sec_key}'"
214
+ else
215
+ "CREDENTIALS 'aws_iam_role=#{@aws_iam_role}'"
216
+ end
217
+ "copy #{@table_name_with_schema}#{copy_columns} from '%s' #{credentials} delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
218
+ end
219
+
220
+
149
221
  def json?
150
222
  @file_type == 'json'
151
223
  end
@@ -175,6 +247,14 @@ class RedshiftOutput < BufferedOutput
175
247
  return nil
176
248
  end
177
249
 
250
+ if @redshift_copy_columns
251
+ unknown_colmns = @redshift_copy_columns - redshift_table_columns
252
+ unless unknown_colmns.empty?
253
+ raise Fluent::ConfigError, "missing columns included in redshift_copy_columns - missing columns:\"#{unknown_colmns.join(',')}\""
254
+ end
255
+ redshift_table_columns = @redshift_copy_columns
256
+ end
257
+
178
258
  # convert json to tsv format text
179
259
  gzw = nil
180
260
  begin
@@ -186,7 +266,8 @@ class RedshiftOutput < BufferedOutput
186
266
  tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
187
267
  gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
188
268
  rescue => e
189
- $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
269
+ text = record.is_a?(Hash) ? record[@record_log_tag] : record
270
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{text})"), :error=>e.to_s
190
271
  $log.error_backtrace
191
272
  end
192
273
  end
@@ -261,6 +342,16 @@ class RedshiftOutput < BufferedOutput
261
342
  s3path
262
343
  end
263
344
 
345
+ def check_credentials
346
+ if @aws_key_id && @aws_sec_key
347
+ true
348
+ elsif @aws_iam_role
349
+ true
350
+ else
351
+ false
352
+ end
353
+ end
354
+
264
355
  class RedshiftError < StandardError
265
356
  def initialize(msg)
266
357
  case msg
@@ -20,6 +20,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
20
20
  CONFIG_BASE= %[
21
21
  aws_key_id test_key_id
22
22
  aws_sec_key test_sec_key
23
+ aws_iam_role test_iam_role
23
24
  s3_bucket test_bucket
24
25
  path log
25
26
  redshift_host test_host
@@ -96,6 +97,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
96
97
  d = create_driver(CONFIG_CSV)
97
98
  assert_equal "test_key_id", d.instance.aws_key_id
98
99
  assert_equal "test_sec_key", d.instance.aws_sec_key
100
+ assert_equal "test_iam_role", d.instance.aws_iam_role
99
101
  assert_equal "test_bucket", d.instance.s3_bucket
100
102
  assert_equal "log/", d.instance.path
101
103
  assert_equal "test_host", d.instance.redshift_host
@@ -111,6 +113,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
111
113
  assert_equal ",", d.instance.delimiter
112
114
  assert_equal true, d.instance.utc
113
115
  assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
116
+ assert_equal nil, d.instance.redshift_copy_columns
114
117
  end
115
118
  def test_configure_with_schemaname
116
119
  d = create_driver(CONFIG_JSON_WITH_SCHEMA)
@@ -168,6 +171,15 @@ class RedshiftOutputTest < Test::Unit::TestCase
168
171
  d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
169
172
  assert_equal "", d.instance.log_suffix
170
173
  end
174
+ def test_configure_redshift_copy_columns
175
+ d = create_driver(CONFIG_CSV + "\n redshift_copy_columns id,name, age created_at")
176
+ assert_equal %w(id name age created_at), d.instance.redshift_copy_columns
177
+ assert_match /^copy test_table\(id,name,age,created_at\) from/, d.instance.instance_variable_get("@copy_sql_template")
178
+ end
179
+ def test_configure_s3_server_side_encryption
180
+ d = create_driver(CONFIG_CSV + "\n s3_server_side_encryption aes256")
181
+ assert_equal :aes256, d.instance.s3_server_side_encryption
182
+ end
171
183
 
172
184
  def emit_csv(d)
173
185
  d.emit(RECORD_CSV_A, DEFAULT_TIME)
@@ -231,9 +243,9 @@ class RedshiftOutputTest < Test::Unit::TestCase
231
243
  end
232
244
  copy_query_regex =
233
245
  if schema_name
234
- /\Acopy #{schema_name}.#{table_name} from/
246
+ /\Acopy #{schema_name}.#{table_name}(\(.+\))? from/
235
247
  else
236
- /\Acopy #{table_name} from/
248
+ /\Acopy #{table_name}(\(.+\))? from/
237
249
  end
238
250
 
239
251
  flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
@@ -274,7 +286,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
274
286
  }
275
287
  assert_equal expected_data, data
276
288
  },
277
- :acl => :bucket_owner_full_control
289
+ :acl => :bucket_owner_full_control,
290
+ :server_side_encryption => nil
278
291
  ).and_return { true }
279
292
 
280
293
  # create mock of s3 object collection
@@ -476,6 +489,23 @@ class RedshiftOutputTest < Test::Unit::TestCase
476
489
  }
477
490
  end
478
491
 
492
+ def test_write_with_json_with_copy_columns
493
+ setup_mocks(%[val_a\tval_b\n])
494
+ setup_tempfile_mock_to_be_closed
495
+ d_json = create_driver(CONFIG_JSON + "\n redshift_copy_columns key_a,key_b")
496
+ emit_json(d_json)
497
+ assert_equal true, d_json.run
498
+ end
499
+
500
+ def test_write_with_json_uknown_columns_in_copy_columns
501
+ setup_mocks("")
502
+ d_json = create_driver(CONFIG_JSON + "\n redshift_copy_columns key_a,key_z")
503
+ emit_json(d_json)
504
+ assert_raise(Fluent::ConfigError, "missing columns included in redshift_copy_columns - missing columns:\"key_z\"") {
505
+ d_json.run
506
+ }
507
+ end
508
+
479
509
  def test_write_with_json_fetch_column_with_schema
480
510
  setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
481
511
  schema_name: 'test_schema')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masashi Miyazaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-11 00:00:00.000000000 Z
11
+ date: 2016-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  version: '0'
136
136
  requirements: []
137
137
  rubyforge_project:
138
- rubygems_version: 2.4.6
138
+ rubygems_version: 2.0.14.1
139
139
  signing_key:
140
140
  specification_version: 4
141
141
  summary: Amazon Redshift output plugin for Fluentd