fluent-plugin-redshift 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae21e38c3e70d5c36c0c52bcf3e38183756e4534
4
- data.tar.gz: a2ebf5bc56f51b9c5f4dbd55fcf07499d9bc9bd8
3
+ metadata.gz: 208e99381f5503be7e6af6fa3ad54fc2cbeb9bea
4
+ data.tar.gz: ed2707d372c126d420fd907bc409b5d0fede132d
5
5
  SHA512:
6
- metadata.gz: 064a44a6ed9086b1aef44eded321e984a9ccb8afc530e46b03ad99a77b2a2b384c24900ceea220f92c96871c1db18c9503910aff50ccc2b36d042e3d6077d80d
7
- data.tar.gz: c85eb3832be2f660f8974b535e95430a51c2a13556a97d3f417eec816a05eb78c015ba079080847a46f48b3f6350cbddd3a06bae12464bcdcba4a26776e28390
6
+ metadata.gz: f8f93c9b5bb3ba860a3c6900c1889453f9417a36bce39def88e82b5ca4bc7dd8a936b6cf805d246317daafd9f2cab168fa7d890a61bbad7b719b14c40321954b
7
+ data.tar.gz: d19b4d4751c71d293493cce89cf93224115c762a32a00e363ce774ffdb40798598f656a6e7de126e0b13012011c311a52bef86804f785ec490b52ad67755876b
data/README.md CHANGED
@@ -39,6 +39,9 @@ Format:
39
39
  buffer_path /var/log/fluent/redshift
40
40
  flush_interval 15m
41
41
  buffer_chunk_limit 1g
42
+
43
+ # maintenance
44
+ maintenance_file_path /path/to/maintenance_file
42
45
  </match>
43
46
 
44
47
  Example (watch and upload json formatted apache log):
@@ -76,6 +79,9 @@ Example (watch and upload json formatted apache log):
76
79
  buffer_path /var/log/fluent/redshift
77
80
  flush_interval 15m
78
81
  buffer_chunk_limit 1g
82
+
83
+ # maintenance
84
+ maintenance_file_path /tmp/fluentd_redshift_maintenance
79
85
  </match>
80
86
 
81
87
  + `type` (required) : The value must be `redshift`.
@@ -129,6 +135,8 @@ Example (watch and upload json formatted apache log):
129
135
 
130
136
  + `utc` : utc time zone. This parameter affects `timestamp_key_format`.
131
137
 
138
+ + `maintenance_file_path` : path of maintenance file. plugin skip processing and keep retrying during a file existing in this file path. To avoid data loss due to too many retries caused by long mainenance, setting `retry_limit` and `retry_wait` is recommended.
139
+
132
140
  ## Logging examples
133
141
  ```ruby
134
142
  # examples by fluent-logger
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.1.0
@@ -16,7 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
17
  gem.require_paths = ["lib"]
18
18
 
19
- gem.add_dependency "fluentd", "~> 0.10.0"
19
+ gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
20
20
  gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
21
21
  gem.add_dependency "pg", "~> 0.17.0"
22
22
  gem.add_development_dependency "rake"
@@ -4,6 +4,8 @@ module Fluent
4
4
  class RedshiftOutput < BufferedOutput
5
5
  Fluent::Plugin.register_output('redshift', self)
6
6
 
7
+ NULL_CHAR_FOR_COPY = "\\N"
8
+
7
9
  # ignore load table error. (invalid data format)
8
10
  IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
9
11
 
@@ -20,8 +22,8 @@ class RedshiftOutput < BufferedOutput
20
22
 
21
23
  config_param :record_log_tag, :string, :default => 'log'
22
24
  # s3
23
- config_param :aws_key_id, :string
24
- config_param :aws_sec_key, :string
25
+ config_param :aws_key_id, :string, :secret => true
26
+ config_param :aws_sec_key, :string, :secret => true
25
27
  config_param :s3_bucket, :string
26
28
  config_param :s3_endpoint, :string, :default => nil
27
29
  config_param :path, :string, :default => ""
@@ -32,7 +34,7 @@ class RedshiftOutput < BufferedOutput
32
34
  config_param :redshift_port, :integer, :default => 5439
33
35
  config_param :redshift_dbname, :string
34
36
  config_param :redshift_user, :string
35
- config_param :redshift_password, :string
37
+ config_param :redshift_password, :string, :secret => true
36
38
  config_param :redshift_tablename, :string
37
39
  config_param :redshift_schemaname, :string, :default => nil
38
40
  config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
@@ -41,6 +43,8 @@ class RedshiftOutput < BufferedOutput
41
43
  # file format
42
44
  config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
43
45
  config_param :delimiter, :string, :default => nil
46
+ # maintenance
47
+ config_param :maintenance_file_path, :string, :default => nil
44
48
  # for debug
45
49
  config_param :log_suffix, :string, :default => ''
46
50
 
@@ -61,6 +65,7 @@ class RedshiftOutput < BufferedOutput
61
65
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
62
66
  @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
63
67
  @copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
68
+ @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
64
69
  end
65
70
 
66
71
  def start
@@ -88,6 +93,7 @@ class RedshiftOutput < BufferedOutput
88
93
 
89
94
  def write(chunk)
90
95
  $log.debug format_log("start creating gz.")
96
+ @maintenance_monitor.check_maintenance!
91
97
 
92
98
  # create a gz file
93
99
  tmp = Tempfile.new("s3-")
@@ -180,12 +186,7 @@ class RedshiftOutput < BufferedOutput
180
186
  tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
181
187
  gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
182
188
  rescue => e
183
- if json?
184
- $log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>e.to_s
185
- else
186
- $log.error format_log("failed to create table text from msgpack. text=(#{record[@record_log_tag]})"), :error=>e.to_s
187
- end
188
-
189
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
189
190
  $log.error_backtrace
190
191
  end
191
192
  end
@@ -220,18 +221,9 @@ class RedshiftOutput < BufferedOutput
220
221
  return "" unless hash
221
222
 
222
223
  # extract values from hash
223
- val_list = redshift_table_columns.collect do |cn|
224
- val = hash[cn]
225
- val = JSON.generate(val) if val.kind_of?(Hash) or val.kind_of?(Array)
226
-
227
- if val.to_s.empty?
228
- nil
229
- else
230
- val.to_s
231
- end
232
- end
224
+ val_list = redshift_table_columns.collect {|cn| hash[cn]}
233
225
 
234
- if val_list.all?{|v| v.nil? or v.empty?}
226
+ if val_list.all?{|v| v.nil?}
235
227
  $log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
236
228
  return ""
237
229
  end
@@ -240,14 +232,22 @@ class RedshiftOutput < BufferedOutput
240
232
  end
241
233
 
242
234
  def generate_line_with_delimiter(val_list, delimiter)
243
- val_list = val_list.collect do |val|
244
- if val.nil? or val.empty?
245
- ""
235
+ val_list.collect do |val|
236
+ case val
237
+ when nil
238
+ NULL_CHAR_FOR_COPY
239
+ when ''
240
+ ''
241
+ when Hash, Array
242
+ escape_text_for_copy(JSON.generate(val))
246
243
  else
247
- val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
244
+ escape_text_for_copy(val.to_s)
248
245
  end
249
- end
250
- val_list.join(delimiter) + "\n"
246
+ end.join(delimiter) + "\n"
247
+ end
248
+
249
+ def escape_text_for_copy(val)
250
+ val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
251
251
  end
252
252
 
253
253
  def create_s3path(bucket, path)
@@ -361,8 +361,26 @@ class RedshiftOutput < BufferedOutput
361
361
  sql
362
362
  end
363
363
  end
364
- end
365
364
 
365
+ class MaintenanceError < StandardError
366
+ end
367
+
368
+ class MaintenanceMonitor
369
+ def initialize(maintenance_file_path)
370
+ @file_path = maintenance_file_path
371
+ end
372
+
373
+ def in_maintenance?
374
+ !!(@file_path && File.exists?(@file_path))
375
+ end
376
+
377
+ def check_maintenance!
378
+ if in_maintenance?
379
+ raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
380
+ end
381
+ end
382
+ end
383
+ end
366
384
 
367
385
 
368
386
  end
@@ -15,6 +15,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
15
15
  PG::Error.module_eval { attr_accessor :result}
16
16
  end
17
17
 
18
+ MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
19
+
18
20
  CONFIG_BASE= %[
19
21
  aws_key_id test_key_id
20
22
  aws_sec_key test_sec_key
@@ -28,6 +30,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
28
30
  buffer_type memory
29
31
  utc
30
32
  log_suffix id:5 host:localhost
33
+ maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
31
34
  ]
32
35
  CONFIG_CSV= %[
33
36
  #{CONFIG_BASE}
@@ -107,6 +110,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
107
110
  assert_equal "csv", d.instance.file_type
108
111
  assert_equal ",", d.instance.delimiter
109
112
  assert_equal true, d.instance.utc
113
+ assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
110
114
  end
111
115
  def test_configure_with_schemaname
112
116
  d = create_driver(CONFIG_JSON_WITH_SCHEMA)
@@ -311,7 +315,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
311
315
  end
312
316
 
313
317
  def test_write_with_json
314
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
318
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
315
319
  setup_tempfile_mock_to_be_closed
316
320
  d_json = create_driver(CONFIG_JSON)
317
321
  emit_json(d_json)
@@ -319,7 +323,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
319
323
  end
320
324
 
321
325
  def test_write_with_json_hash_value
322
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
326
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
323
327
  d_json = create_driver(CONFIG_JSON)
324
328
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
325
329
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -327,7 +331,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
327
331
  end
328
332
 
329
333
  def test_write_with_json_array_value
330
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
334
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
331
335
  d_json = create_driver(CONFIG_JSON)
332
336
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
333
337
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -335,13 +339,20 @@ class RedshiftOutputTest < Test::Unit::TestCase
335
339
  end
336
340
 
337
341
  def test_write_with_json_including_tab_newline_quote
338
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
342
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
339
343
  d_json = create_driver(CONFIG_JSON)
340
344
  d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
341
345
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
342
346
  assert_equal true, d_json.run
343
347
  end
344
348
 
349
+ def test_write_with_json_empty_text_value
350
+ setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
351
+ d_json = create_driver(CONFIG_JSON)
352
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
353
+ assert_equal true, d_json.run
354
+ end
355
+
345
356
  def test_write_with_json_no_data
346
357
  setup_mocks("")
347
358
  d_json = create_driver(CONFIG_JSON)
@@ -351,7 +362,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
351
362
  end
352
363
 
353
364
  def test_write_with_json_invalid_one_line
354
- setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
365
+ setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
355
366
  d_json = create_driver(CONFIG_JSON)
356
367
  d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
357
368
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -359,7 +370,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
359
370
  end
360
371
 
361
372
  def test_write_with_json_no_available_data
362
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
373
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
363
374
  d_json = create_driver(CONFIG_JSON)
364
375
  d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
365
376
  d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
@@ -367,14 +378,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
367
378
  end
368
379
 
369
380
  def test_write_with_msgpack
370
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
381
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
371
382
  d_msgpack = create_driver(CONFIG_MSGPACK)
372
383
  emit_msgpack(d_msgpack)
373
384
  assert_equal true, d_msgpack.run
374
385
  end
375
386
 
376
387
  def test_write_with_msgpack_hash_value
377
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
388
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
378
389
  d_msgpack = create_driver(CONFIG_MSGPACK)
379
390
  d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
380
391
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -382,7 +393,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
382
393
  end
383
394
 
384
395
  def test_write_with_msgpack_array_value
385
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
396
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
386
397
  d_msgpack = create_driver(CONFIG_MSGPACK)
387
398
  d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
388
399
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -390,7 +401,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
390
401
  end
391
402
 
392
403
  def test_write_with_msgpack_including_tab_newline_quote
393
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
404
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
394
405
  d_msgpack = create_driver(CONFIG_MSGPACK)
395
406
  d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
396
407
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -406,7 +417,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
406
417
  end
407
418
 
408
419
  def test_write_with_msgpack_no_available_data
409
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
420
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
410
421
  d_msgpack = create_driver(CONFIG_MSGPACK)
411
422
  d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
412
423
  d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
@@ -466,10 +477,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
466
477
  end
467
478
 
468
479
  def test_write_with_json_fetch_column_with_schema
469
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n],
480
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
470
481
  schema_name: 'test_schema')
471
482
  d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
472
483
  emit_json(d_json)
473
484
  assert_equal true, d_json.run
474
485
  end
486
+
487
+ def test_maintenance_mode
488
+ flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
489
+
490
+ d_json = create_driver(CONFIG_JSON)
491
+ emit_json(d_json)
492
+ assert_raise(Fluent::RedshiftOutput::MaintenanceError,
493
+ "Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
494
+ d_json.run
495
+ }
496
+ end
475
497
  end
metadata CHANGED
@@ -1,29 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masashi Miyazaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-05 00:00:00.000000000 Z
11
+ date: 2015-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.10.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - ~>
27
+ - - '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: 0.10.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: aws-sdk-v1
29
35
  requirement: !ruby/object:Gem::Requirement