fluent-plugin-redshift 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae21e38c3e70d5c36c0c52bcf3e38183756e4534
4
- data.tar.gz: a2ebf5bc56f51b9c5f4dbd55fcf07499d9bc9bd8
3
+ metadata.gz: 208e99381f5503be7e6af6fa3ad54fc2cbeb9bea
4
+ data.tar.gz: ed2707d372c126d420fd907bc409b5d0fede132d
5
5
  SHA512:
6
- metadata.gz: 064a44a6ed9086b1aef44eded321e984a9ccb8afc530e46b03ad99a77b2a2b384c24900ceea220f92c96871c1db18c9503910aff50ccc2b36d042e3d6077d80d
7
- data.tar.gz: c85eb3832be2f660f8974b535e95430a51c2a13556a97d3f417eec816a05eb78c015ba079080847a46f48b3f6350cbddd3a06bae12464bcdcba4a26776e28390
6
+ metadata.gz: f8f93c9b5bb3ba860a3c6900c1889453f9417a36bce39def88e82b5ca4bc7dd8a936b6cf805d246317daafd9f2cab168fa7d890a61bbad7b719b14c40321954b
7
+ data.tar.gz: d19b4d4751c71d293493cce89cf93224115c762a32a00e363ce774ffdb40798598f656a6e7de126e0b13012011c311a52bef86804f785ec490b52ad67755876b
data/README.md CHANGED
@@ -39,6 +39,9 @@ Format:
39
39
  buffer_path /var/log/fluent/redshift
40
40
  flush_interval 15m
41
41
  buffer_chunk_limit 1g
42
+
43
+ # maintenance
44
+ maintenance_file_path /path/to/maintenance_file
42
45
  </match>
43
46
 
44
47
  Example (watch and upload json formatted apache log):
@@ -76,6 +79,9 @@ Example (watch and upload json formatted apache log):
76
79
  buffer_path /var/log/fluent/redshift
77
80
  flush_interval 15m
78
81
  buffer_chunk_limit 1g
82
+
83
+ # maintenance
84
+ maintenance_file_path /tmp/fluentd_redshift_maintenance
79
85
  </match>
80
86
 
81
87
  + `type` (required) : The value must be `redshift`.
@@ -129,6 +135,8 @@ Example (watch and upload json formatted apache log):
129
135
 
130
136
  + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
131
137
 
138
+ + `maintenance_file_path` : path of maintenance file. plugin skip processing and keep retrying during a file existing in this file path. To avoid data loss due to too many retries caused by long mainenance, setting `retry_limit` and `retry_wait` is recommended.
139
+
132
140
  ## Logging examples
133
141
  ```ruby
134
142
  # examples by fluent-logger
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.1.0
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
17
  gem.require_paths = ["lib"]
18
18
 
19
- gem.add_dependency "fluentd", "~> 0.10.0"
19
+ gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
20
20
  gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
21
21
  gem.add_dependency "pg", "~> 0.17.0"
22
22
  gem.add_development_dependency "rake"
@@ -4,6 +4,8 @@ module Fluent
4
4
  class RedshiftOutput < BufferedOutput
5
5
  Fluent::Plugin.register_output('redshift', self)
6
6
 
7
+ NULL_CHAR_FOR_COPY = "\\N"
8
+
7
9
  # ignore load table error. (invalid data format)
8
10
  IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
9
11
 
@@ -20,8 +22,8 @@ class RedshiftOutput < BufferedOutput
20
22
 
21
23
  config_param :record_log_tag, :string, :default => 'log'
22
24
  # s3
23
- config_param :aws_key_id, :string
24
- config_param :aws_sec_key, :string
25
+ config_param :aws_key_id, :string, :secret => true
26
+ config_param :aws_sec_key, :string, :secret => true
25
27
  config_param :s3_bucket, :string
26
28
  config_param :s3_endpoint, :string, :default => nil
27
29
  config_param :path, :string, :default => ""
@@ -32,7 +34,7 @@ class RedshiftOutput < BufferedOutput
32
34
  config_param :redshift_port, :integer, :default => 5439
33
35
  config_param :redshift_dbname, :string
34
36
  config_param :redshift_user, :string
35
- config_param :redshift_password, :string
37
+ config_param :redshift_password, :string, :secret => true
36
38
  config_param :redshift_tablename, :string
37
39
  config_param :redshift_schemaname, :string, :default => nil
38
40
  config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
@@ -41,6 +43,8 @@ class RedshiftOutput < BufferedOutput
41
43
  # file format
42
44
  config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
43
45
  config_param :delimiter, :string, :default => nil
46
+ # maintenance
47
+ config_param :maintenance_file_path, :string, :default => nil
44
48
  # for debug
45
49
  config_param :log_suffix, :string, :default => ''
46
50
 
@@ -61,6 +65,7 @@ class RedshiftOutput < BufferedOutput
61
65
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
62
66
  @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
63
67
  @copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
68
+ @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
64
69
  end
65
70
 
66
71
  def start
@@ -88,6 +93,7 @@ class RedshiftOutput < BufferedOutput
88
93
 
89
94
  def write(chunk)
90
95
  $log.debug format_log("start creating gz.")
96
+ @maintenance_monitor.check_maintenance!
91
97
 
92
98
  # create a gz file
93
99
  tmp = Tempfile.new("s3-")
@@ -180,12 +186,7 @@ class RedshiftOutput < BufferedOutput
180
186
  tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
181
187
  gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
182
188
  rescue => e
183
- if json?
184
- $log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>e.to_s
185
- else
186
- $log.error format_log("failed to create table text from msgpack. text=(#{record[@record_log_tag]})"), :error=>e.to_s
187
- end
188
-
189
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
189
190
  $log.error_backtrace
190
191
  end
191
192
  end
@@ -220,18 +221,9 @@ class RedshiftOutput < BufferedOutput
220
221
  return "" unless hash
221
222
 
222
223
  # extract values from hash
223
- val_list = redshift_table_columns.collect do |cn|
224
- val = hash[cn]
225
- val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
226
-
227
- if val.to_s.empty?
228
- nil
229
- else
230
- val.to_s
231
- end
232
- end
224
+ val_list = redshift_table_columns.collect {|cn| hash[cn]}
233
225
 
234
- if val_list.all?{|v| v.nil? or v.empty?}
226
+ if val_list.all?{|v| v.nil?}
235
227
  $log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
236
228
  return ""
237
229
  end
@@ -240,14 +232,22 @@ class RedshiftOutput < BufferedOutput
240
232
  end
241
233
 
242
234
  def generate_line_with_delimiter(val_list, delimiter)
243
- val_list = val_list.collect do |val|
244
- if val.nil? or val.empty?
245
- ""
235
+ val_list.collect do |val|
236
+ case val
237
+ when nil
238
+ NULL_CHAR_FOR_COPY
239
+ when ''
240
+ ''
241
+ when Hash, Array
242
+ escape_text_for_copy(JSON.generate(val))
246
243
  else
247
- val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
244
+ escape_text_for_copy(val.to_s)
248
245
  end
249
- end
250
- val_list.join(delimiter) + "\n"
246
+ end.join(delimiter) + "\n"
247
+ end
248
+
249
+ def escape_text_for_copy(val)
250
+ val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
251
251
  end
252
252
 
253
253
  def create_s3path(bucket, path)
@@ -361,8 +361,26 @@ class RedshiftOutput < BufferedOutput
361
361
  sql
362
362
  end
363
363
  end
364
- end
365
364
 
365
+ class MaintenanceError < StandardError
366
+ end
367
+
368
+ class MaintenanceMonitor
369
+ def initialize(maintenance_file_path)
370
+ @file_path = maintenance_file_path
371
+ end
372
+
373
+ def in_maintenance?
374
+ !!(@file_path && File.exists?(@file_path))
375
+ end
376
+
377
+ def check_maintenance!
378
+ if in_maintenance?
379
+ raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
380
+ end
381
+ end
382
+ end
383
+ end
366
384
 
367
385
 
368
386
  end
@@ -15,6 +15,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
15
15
  PG::Error.module_eval { attr_accessor :result}
16
16
  end
17
17
 
18
+ MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
19
+
18
20
  CONFIG_BASE= %[
19
21
  aws_key_id test_key_id
20
22
  aws_sec_key test_sec_key
@@ -28,6 +30,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
28
30
  buffer_type memory
29
31
  utc
30
32
  log_suffix id:5 host:localhost
33
+ maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
31
34
  ]
32
35
  CONFIG_CSV= %[
33
36
  #{CONFIG_BASE}
@@ -107,6 +110,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
107
110
  assert_equal "csv", d.instance.file_type
108
111
  assert_equal ",", d.instance.delimiter
109
112
  assert_equal true, d.instance.utc
113
+ assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
110
114
  end
111
115
  def test_configure_with_schemaname
112
116
  d = create_driver(CONFIG_JSON_WITH_SCHEMA)
@@ -311,7 +315,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
311
315
  end
312
316
 
313
317
  def test_write_with_json
314
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
318
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
315
319
  setup_tempfile_mock_to_be_closed
316
320
  d_json = create_driver(CONFIG_JSON)
317
321
  emit_json(d_json)
@@ -319,7 +323,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
319
323
  end
320
324
 
321
325
  def test_write_with_json_hash_value
322
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
326
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
323
327
  d_json = create_driver(CONFIG_JSON)
324
328
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
325
329
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -327,7 +331,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
327
331
  end
328
332
 
329
333
  def test_write_with_json_array_value
330
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
334
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
331
335
  d_json = create_driver(CONFIG_JSON)
332
336
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
333
337
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -335,13 +339,20 @@ class RedshiftOutputTest < Test::Unit::TestCase
335
339
  end
336
340
 
337
341
  def test_write_with_json_including_tab_newline_quote
338
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
342
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
339
343
  d_json = create_driver(CONFIG_JSON)
340
344
  d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
341
345
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
342
346
  assert_equal true, d_json.run
343
347
  end
344
348
 
349
+ def test_write_with_json_empty_text_value
350
+ setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
351
+ d_json = create_driver(CONFIG_JSON)
352
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
353
+ assert_equal true, d_json.run
354
+ end
355
+
345
356
  def test_write_with_json_no_data
346
357
  setup_mocks("")
347
358
  d_json = create_driver(CONFIG_JSON)
@@ -351,7 +362,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
351
362
  end
352
363
 
353
364
  def test_write_with_json_invalid_one_line
354
- setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
365
+ setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
355
366
  d_json = create_driver(CONFIG_JSON)
356
367
  d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
357
368
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -359,7 +370,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
359
370
  end
360
371
 
361
372
  def test_write_with_json_no_available_data
362
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
373
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
363
374
  d_json = create_driver(CONFIG_JSON)
364
375
  d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
365
376
  d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
@@ -367,14 +378,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
367
378
  end
368
379
 
369
380
  def test_write_with_msgpack
370
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
381
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
371
382
  d_msgpack = create_driver(CONFIG_MSGPACK)
372
383
  emit_msgpack(d_msgpack)
373
384
  assert_equal true, d_msgpack.run
374
385
  end
375
386
 
376
387
  def test_write_with_msgpack_hash_value
377
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
388
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
378
389
  d_msgpack = create_driver(CONFIG_MSGPACK)
379
390
  d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
380
391
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -382,7 +393,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
382
393
  end
383
394
 
384
395
  def test_write_with_msgpack_array_value
385
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
396
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
386
397
  d_msgpack = create_driver(CONFIG_MSGPACK)
387
398
  d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
388
399
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -390,7 +401,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
390
401
  end
391
402
 
392
403
  def test_write_with_msgpack_including_tab_newline_quote
393
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
404
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
394
405
  d_msgpack = create_driver(CONFIG_MSGPACK)
395
406
  d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
396
407
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -406,7 +417,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
406
417
  end
407
418
 
408
419
  def test_write_with_msgpack_no_available_data
409
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
420
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
410
421
  d_msgpack = create_driver(CONFIG_MSGPACK)
411
422
  d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
412
423
  d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
@@ -466,10 +477,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
466
477
  end
467
478
 
468
479
  def test_write_with_json_fetch_column_with_schema
469
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n],
480
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
470
481
  schema_name: 'test_schema')
471
482
  d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
472
483
  emit_json(d_json)
473
484
  assert_equal true, d_json.run
474
485
  end
486
+
487
+ def test_maintenance_mode
488
+ flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
489
+
490
+ d_json = create_driver(CONFIG_JSON)
491
+ emit_json(d_json)
492
+ assert_raise(Fluent::RedshiftOutput::MaintenanceError,
493
+ "Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
494
+ d_json.run
495
+ }
496
+ end
475
497
  end
metadata CHANGED
@@ -1,29 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masashi Miyazaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-05 00:00:00.000000000 Z
11
+ date: 2015-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.10.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - ~>
27
+ - - '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: 0.10.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: aws-sdk-v1
29
35
  requirement: !ruby/object:Gem::Requirement