fluent-plugin-redshift-out 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5b20d3794db1fa89bcd3023c16a7521da561463d
4
+ data.tar.gz: c07149ac3e5cf6978b7ccd6b72becb7407274d88
5
+ SHA512:
6
+ metadata.gz: fea913edcc8f21adf6e7c9b9068b9ae0830fc2813fe0b37640b868dcff1dbdb45fb55566f8a1e9c2e6acf59dfb3a1511105181941523811238aa98a36ddbc56b
7
+ data.tar.gz: 775d3b77816faa0c010e3f3eef54cec753f2ed12704bc1c89ed890c2db7464d2b206ff25a40e20ce9b63ca82e193fe10691e229c0c86e4ef987c9c33090c8a41
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.4.0
@@ -16,8 +16,8 @@ Gem::Specification.new do |gem|
16
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
17
  gem.require_paths = ["lib"]
18
18
 
19
- gem.add_dependency "fluentd", "~> 0.10.0"
20
- gem.add_dependency "aws-sdk", "~> 1.60"
19
+ gem.add_dependency "fluentd", [">= 0.10.0", "< 2"]
20
+ gem.add_dependency "aws-sdk-v1", ">= 1.6.3"
21
21
  gem.add_dependency "multi_json", "~> 1.10"
22
22
  gem.add_dependency "yajl-ruby", "~> 1.2"
23
23
  gem.add_dependency "pg", "~> 0.17.0"
@@ -4,12 +4,14 @@ module Fluent
4
4
  class RedshiftOutput < BufferedOutput
5
5
  Fluent::Plugin.register_output('redshift-out', self)
6
6
 
7
+ NULL_CHAR_FOR_COPY = "\\N"
8
+
7
9
  # ignore load table error. (invalid data format)
8
10
  IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
9
11
 
10
12
  def initialize
11
13
  super
12
- require 'aws-sdk'
14
+ require 'aws-sdk-v1'
13
15
  require 'zlib'
14
16
  require 'time'
15
17
  require 'tempfile'
@@ -22,8 +24,8 @@ class RedshiftOutput < BufferedOutput
22
24
 
23
25
  config_param :record_log_tag, :string, :default => 'log'
24
26
  # s3
25
- config_param :aws_key_id, :string, :default => nil
26
- config_param :aws_sec_key, :string, :default => nil
27
+ config_param :aws_key_id, :string, :secret => true, :default => nil
28
+ config_param :aws_sec_key, :string, :secret => true, :default => nil
27
29
  config_param :s3_bucket, :string
28
30
  config_param :s3_region, :string, :default => nil
29
31
  config_param :path, :string, :default => ""
@@ -34,7 +36,7 @@ class RedshiftOutput < BufferedOutput
34
36
  config_param :redshift_port, :integer, :default => 5439
35
37
  config_param :redshift_dbname, :string
36
38
  config_param :redshift_user, :string
37
- config_param :redshift_password, :string
39
+ config_param :redshift_password, :string, :secret => true
38
40
  config_param :redshift_tablename, :string
39
41
  config_param :redshift_schemaname, :string, :default => nil
40
42
  config_param :redshift_copy_base_options, :string , :default => "FILLRECORD ACCEPTANYDATE TRUNCATECOLUMNS"
@@ -43,6 +45,8 @@ class RedshiftOutput < BufferedOutput
43
45
  # file format
44
46
  config_param :file_type, :string, :default => nil # json, tsv, csv, msgpack
45
47
  config_param :delimiter, :string, :default => nil
48
+ # maintenance
49
+ config_param :maintenance_file_path, :string, :default => nil
46
50
  # for debug
47
51
  config_param :log_suffix, :string, :default => ''
48
52
 
@@ -61,6 +65,8 @@ class RedshiftOutput < BufferedOutput
61
65
  }
62
66
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
63
67
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
68
+ @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
69
+ @maintenance_monitor = MaintenanceMonitor.new(@maintenance_file_path)
64
70
  end
65
71
 
66
72
  def start
@@ -74,6 +80,7 @@ class RedshiftOutput < BufferedOutput
74
80
  options[:region] = @s3_region if @s3_region
75
81
  @s3 = AWS::S3.new(options)
76
82
  @bucket = @s3.buckets[@s3_bucket]
83
+ @redshift_connection = RedshiftConnection.new(@db_conf)
77
84
  end
78
85
 
79
86
  def format(tag, time, record)
@@ -88,6 +95,7 @@ class RedshiftOutput < BufferedOutput
88
95
 
89
96
  def write(chunk)
90
97
  $log.debug format_log("start creating gz.")
98
+ @maintenance_monitor.check_maintenance!
91
99
 
92
100
  # create a gz file
93
101
  tmp = Tempfile.new("s3-")
@@ -117,7 +125,7 @@ class RedshiftOutput < BufferedOutput
117
125
  # copy gz on s3 to redshift
118
126
  s3_uri = "s3://#{@s3_bucket}/#{s3path}"
119
127
  credentials = @s3.client.credential_provider.credentials
120
- sql = "copy #{table_name_with_schema} from '#{s3_uri}'"
128
+ sql = "copy #{@table_name_with_schema} from '#{s3_uri}'"
121
129
  sql += " CREDENTIALS 'aws_access_key_id=#{credentials[:access_key_id]};aws_secret_access_key=#{credentials[:secret_access_key]}"
122
130
  sql += ";token=#{credentials[:session_token]}" if credentials[:session_token]
123
131
  sql += "' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
@@ -125,25 +133,26 @@ class RedshiftOutput < BufferedOutput
125
133
  $log.debug format_log("start copying. s3_uri=#{s3_uri}")
126
134
 
127
135
  begin
128
- conn = PG.connect(@db_conf)
129
- conn.exec(sql)
136
+ @redshift_connection.exec(sql)
130
137
  $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
131
- rescue PG::Error => e
132
- $log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
133
- raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
134
- return false # for debug
135
- ensure
136
- conn.close rescue nil if conn
138
+ rescue RedshiftError => e
139
+ if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
140
+ $log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
141
+ return false # for debug
142
+ end
143
+ raise e
137
144
  end
138
145
  true # for debug
139
146
  end
140
147
 
141
148
  protected
149
+
142
150
  def format_log(message)
143
151
  (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
144
152
  end
145
153
 
146
154
  private
155
+
147
156
  def json?
148
157
  @file_type == 'json'
149
158
  end
@@ -165,11 +174,11 @@ class RedshiftOutput < BufferedOutput
165
174
 
166
175
  def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
167
176
  # fetch the table definition from redshift
168
- redshift_table_columns = fetch_table_columns
177
+ redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
169
178
  if redshift_table_columns == nil
170
179
  raise "failed to fetch the redshift table definition."
171
180
  elsif redshift_table_columns.empty?
172
- $log.warn format_log("no table on redshift. table_name=#{table_name_with_schema}")
181
+ $log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
173
182
  return nil
174
183
  end
175
184
 
@@ -184,12 +193,7 @@ class RedshiftOutput < BufferedOutput
184
193
  tsv_text = hash_to_table_text(redshift_table_columns, hash, delimiter)
185
194
  gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
186
195
  rescue => e
187
- if json?
188
- $log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>e.to_s
189
- else
190
- $log.error format_log("failed to create table text from msgpack. text=(#{record[@record_log_tag]})"), :error=>e.to_s
191
- end
192
-
196
+ $log.error format_log("failed to create table text from #{@file_type}. text=(#{record[@record_log_tag]})"), :error=>e.to_s
193
197
  $log.error_backtrace
194
198
  end
195
199
  end
@@ -211,27 +215,6 @@ class RedshiftOutput < BufferedOutput
211
215
  end
212
216
  end
213
217
 
214
- def fetch_table_columns
215
- begin
216
- columns = nil
217
- conn = PG.connect(@db_conf)
218
- conn.exec(fetch_columns_sql_with_schema) do |result|
219
- columns = result.collect{|row| row['column_name']}
220
- end
221
- columns
222
- ensure
223
- conn.close rescue nil if conn
224
- end
225
- end
226
-
227
- def fetch_columns_sql_with_schema
228
- @fetch_columns_sql ||= if @redshift_schemaname
229
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@redshift_schemaname}' and table_name = '#{@redshift_tablename}' order by ordinal_position;"
230
- else
231
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
232
- end
233
- end
234
-
235
218
  def json_to_hash(json_text)
236
219
  return nil if json_text.to_s.empty?
237
220
 
@@ -245,18 +228,9 @@ class RedshiftOutput < BufferedOutput
245
228
  return "" unless hash
246
229
 
247
230
  # extract values from hash
248
- val_list = redshift_table_columns.collect do |cn|
249
- val = hash[cn]
250
- val = MultiJson.dump(val) if val.kind_of?(Hash) or val.kind_of?(Array)
251
-
252
- if val.to_s.empty?
253
- nil
254
- else
255
- val.to_s
256
- end
257
- end
231
+ val_list = redshift_table_columns.collect {|cn| hash[cn]}
258
232
 
259
- if val_list.all?{|v| v.nil? or v.empty?}
233
+ if val_list.all?{|v| v.nil?}
260
234
  $log.warn format_log("no data match for table columns on redshift. data=#{hash} table_columns=#{redshift_table_columns}")
261
235
  return ""
262
236
  end
@@ -265,14 +239,22 @@ class RedshiftOutput < BufferedOutput
265
239
  end
266
240
 
267
241
  def generate_line_with_delimiter(val_list, delimiter)
268
- val_list = val_list.collect do |val|
269
- if val.nil? or val.empty?
270
- ""
242
+ val_list.collect do |val|
243
+ case val
244
+ when nil
245
+ NULL_CHAR_FOR_COPY
246
+ when ''
247
+ ''
248
+ when Hash, Array
249
+ escape_text_for_copy(MultiJson.dump(val))
271
250
  else
272
- val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
251
+ escape_text_for_copy(val.to_s)
273
252
  end
274
- end
275
- val_list.join(delimiter) + "\n"
253
+ end.join(delimiter) + "\n"
254
+ end
255
+
256
+ def escape_text_for_copy(val)
257
+ val.gsub(/\\|\t|\n/, {"\\" => "\\\\", "\t" => "\\\t", "\n" => "\\\n"}) # escape tab, newline and backslash
276
258
  end
277
259
 
278
260
  def create_s3path(bucket, path)
@@ -286,12 +268,124 @@ class RedshiftOutput < BufferedOutput
286
268
  s3path
287
269
  end
288
270
 
289
- def table_name_with_schema
290
- @table_name_with_schema ||= if @redshift_schemaname
291
- "#{@redshift_schemaname}.#{@redshift_tablename}"
292
- else
293
- @redshift_tablename
294
- end
271
+ class RedshiftError < StandardError
272
+ def initialize(msg)
273
+ case msg
274
+ when PG::Error
275
+ @pg_error = msg
276
+ super(msg.to_s)
277
+ set_backtrace(msg.backtrace)
278
+ else
279
+ super
280
+ end
281
+ end
282
+
283
+ attr_accessor :pg_error
284
+ end
285
+
286
+ class RedshiftConnection
287
+ REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
288
+
289
+ def initialize(db_conf)
290
+ @db_conf = db_conf
291
+ @connection = nil
292
+ end
293
+
294
+ attr_reader :db_conf
295
+
296
+ def fetch_table_columns(table_name, schema_name)
297
+ columns = nil
298
+ exec(fetch_columns_sql(table_name, schema_name)) do |result|
299
+ columns = result.collect{|row| row['column_name']}
300
+ end
301
+ columns
302
+ end
303
+
304
+ def exec(sql, &block)
305
+ conn = @connection
306
+ conn = create_redshift_connection if conn.nil?
307
+ if block
308
+ conn.exec(sql) {|result| block.call(result)}
309
+ else
310
+ conn.exec(sql)
311
+ end
312
+ rescue PG::Error => e
313
+ raise RedshiftError.new(e)
314
+ ensure
315
+ conn.close if conn && @connection.nil?
316
+ end
317
+
318
+ def connect_start
319
+ @connection = create_redshift_connection
320
+ end
321
+
322
+ def close
323
+ @connection.close rescue nil if @connection
324
+ @connection = nil
325
+ end
326
+
327
+ private
328
+
329
+ def create_redshift_connection
330
+ hostaddr = IPSocket.getaddress(db_conf[:host])
331
+ db_conf[:hostaddr] = hostaddr
332
+
333
+ conn = PG::Connection.connect_start(db_conf)
334
+ raise RedshiftError.new("Unable to create a new connection.") unless conn
335
+ if conn.status == PG::CONNECTION_BAD
336
+ raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
337
+ end
338
+
339
+ socket = conn.socket_io
340
+ poll_status = PG::PGRES_POLLING_WRITING
341
+ until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
342
+ case poll_status
343
+ when PG::PGRES_POLLING_READING
344
+ IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
345
+ raise RedshiftError.new("Asynchronous connection timed out!(READING)")
346
+ when PG::PGRES_POLLING_WRITING
347
+ IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
348
+ raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
349
+ end
350
+ poll_status = conn.connect_poll
351
+ end
352
+
353
+ unless conn.status == PG::CONNECTION_OK
354
+ raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
355
+ end
356
+
357
+ conn
358
+ rescue => e
359
+ conn.close rescue nil if conn
360
+ raise RedshiftError.new(e) if e.kind_of?(PG::Error)
361
+ raise e
362
+ end
363
+
364
+ def fetch_columns_sql(table_name, schema_name = nil)
365
+ sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
366
+ sql << " and table_schema = '#{schema_name}'" if schema_name
367
+ sql << " order by ordinal_position;"
368
+ sql
369
+ end
370
+ end
371
+
372
+ class MaintenanceError < StandardError
373
+ end
374
+
375
+ class MaintenanceMonitor
376
+ def initialize(maintenance_file_path)
377
+ @file_path = maintenance_file_path
378
+ end
379
+
380
+ def in_maintenance?
381
+ !!(@file_path && File.exists?(@file_path))
382
+ end
383
+
384
+ def check_maintenance!
385
+ if in_maintenance?
386
+ raise MaintenanceError.new("Service is in maintenance mode - maintenance_file_path:#{@file_path}")
387
+ end
388
+ end
295
389
  end
296
390
  end
297
391
 
@@ -1,19 +1,22 @@
1
1
  require 'test_helper'
2
2
 
3
3
  require 'fluent/test'
4
- require 'fluent/plugin/out_redshift'
4
+ require 'fluent/plugin/out_redshift-out'
5
5
  require 'flexmock/test_unit'
6
6
  require 'zlib'
7
7
 
8
8
 
9
9
  class RedshiftOutputTest < Test::Unit::TestCase
10
10
  def setup
11
- require 'aws-sdk'
11
+ require 'aws-sdk-v1'
12
12
  require 'pg'
13
13
  require 'csv'
14
14
  Fluent::Test.setup
15
+ PG::Error.module_eval { attr_accessor :result}
15
16
  end
16
17
 
18
+ MAINTENANCE_FILE_PATH_FOR_TEST = "/tmp/fluentd_redshift_plugin_test_maintenance"
19
+
17
20
  CONFIG_BASE= %[
18
21
  aws_key_id test_key_id
19
22
  aws_sec_key test_sec_key
@@ -27,6 +30,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
27
30
  buffer_type memory
28
31
  utc
29
32
  log_suffix id:5 host:localhost
33
+ maintenance_file_path #{MAINTENANCE_FILE_PATH_FOR_TEST}
30
34
  ]
31
35
  CONFIG_CSV= %[
32
36
  #{CONFIG_BASE}
@@ -106,6 +110,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
106
110
  assert_equal "csv", d.instance.file_type
107
111
  assert_equal ",", d.instance.delimiter
108
112
  assert_equal true, d.instance.utc
113
+ assert_equal MAINTENANCE_FILE_PATH_FOR_TEST, d.instance.maintenance_file_path
109
114
  end
110
115
  def test_configure_with_schemaname
111
116
  d = create_driver(CONFIG_JSON_WITH_SCHEMA)
@@ -211,54 +216,45 @@ class RedshiftOutputTest < Test::Unit::TestCase
211
216
  d_msgpack.run
212
217
  end
213
218
 
214
- class PGConnectionMock
215
- def initialize(options = {})
216
- @return_keys = options[:return_keys] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
217
- @target_schema = options[:schemaname] || nil
218
- @target_table = options[:tablename] || 'test_table'
219
- end
219
+ def setup_redshift_connection_mock(options = {})
220
+ options ||= {}
221
+ column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
222
+ schema_name = options[:schema_name]
223
+ table_name = options[:table_name] || 'test_table'
224
+ exec_sql_proc = options[:exec_sql_proc]
220
225
 
221
- def expected_column_list_query
222
- if @target_schema
223
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@target_schema}' and table_name = '#{@target_table}'/
226
+ column_list_query_regex =
227
+ if schema_name
228
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
224
229
  else
225
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@target_table}'/
230
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
226
231
  end
227
- end
228
-
229
- def expected_copy_query
230
- if @target_schema
231
- /\Acopy #{@target_schema}.#{@target_table} from/
232
+ copy_query_regex =
233
+ if schema_name
234
+ /\Acopy #{schema_name}.#{table_name} from/
232
235
  else
233
- /\Acopy #{@target_table} from/
236
+ /\Acopy #{table_name} from/
234
237
  end
235
- end
236
238
 
237
- def exec(sql, &block)
238
- if block_given?
239
- if sql =~ expected_column_list_query
240
- yield @return_keys.collect{|key| {'column_name' => key}}
239
+ flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
240
+ conn.should_receive(:exec).and_return do |sql, block|
241
+ if exec_sql_proc
242
+ exec_sql_proc.call(sql, block)
243
+ elsif block
244
+ if sql =~ column_list_query_regex
245
+ block.call column_names.collect{|key| {'column_name' => key}}
246
+ else
247
+ block.call []
248
+ end
241
249
  else
242
- yield []
243
- end
244
- else
245
- unless sql =~ expected_copy_query
246
- error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
247
- error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
248
- raise error
250
+ unless sql =~ copy_query_regex
251
+ error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
252
+ error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
253
+ raise Fluent::RedshiftOutput::RedshiftError.new(error)
254
+ end
249
255
  end
250
256
  end
251
257
  end
252
-
253
- def close
254
- end
255
- end
256
-
257
- def setup_pg_mock
258
- # create mock of PG
259
- def PG.connect(dbinfo)
260
- return PGConnectionMock.new
261
- end
262
258
  end
263
259
 
264
260
  def setup_s3_mock(expected_data)
@@ -305,9 +301,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
305
301
  flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
306
302
  end
307
303
 
308
- def setup_mocks(expected_data)
309
- setup_pg_mock
310
- setup_s3_mock(expected_data) end
304
+ def setup_mocks(expected_data, options = {})
305
+ setup_redshift_connection_mock(options)
306
+ setup_s3_mock(expected_data)
307
+ end
311
308
 
312
309
  def test_write_with_csv
313
310
  setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
@@ -318,7 +315,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
318
315
  end
319
316
 
320
317
  def test_write_with_json
321
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
318
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
322
319
  setup_tempfile_mock_to_be_closed
323
320
  d_json = create_driver(CONFIG_JSON)
324
321
  emit_json(d_json)
@@ -326,7 +323,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
326
323
  end
327
324
 
328
325
  def test_write_with_json_hash_value
329
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
326
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
330
327
  d_json = create_driver(CONFIG_JSON)
331
328
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
332
329
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -334,7 +331,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
334
331
  end
335
332
 
336
333
  def test_write_with_json_array_value
337
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
334
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
338
335
  d_json = create_driver(CONFIG_JSON)
339
336
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
340
337
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -342,13 +339,20 @@ class RedshiftOutputTest < Test::Unit::TestCase
342
339
  end
343
340
 
344
341
  def test_write_with_json_including_tab_newline_quote
345
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
342
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
346
343
  d_json = create_driver(CONFIG_JSON)
347
344
  d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
348
345
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
349
346
  assert_equal true, d_json.run
350
347
  end
351
348
 
349
+ def test_write_with_json_empty_text_value
350
+ setup_mocks(%[val_a\t\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
351
+ d_json = create_driver(CONFIG_JSON)
352
+ d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ""}]} , DEFAULT_TIME)
353
+ assert_equal true, d_json.run
354
+ end
355
+
352
356
  def test_write_with_json_no_data
353
357
  setup_mocks("")
354
358
  d_json = create_driver(CONFIG_JSON)
@@ -358,7 +362,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
358
362
  end
359
363
 
360
364
  def test_write_with_json_invalid_one_line
361
- setup_mocks(%[\t\tval_c\tval_d\t\t\t\t\n])
365
+ setup_mocks(%[\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
362
366
  d_json = create_driver(CONFIG_JSON)
363
367
  d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
364
368
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
@@ -366,7 +370,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
366
370
  end
367
371
 
368
372
  def test_write_with_json_no_available_data
369
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
373
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
370
374
  d_json = create_driver(CONFIG_JSON)
371
375
  d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
372
376
  d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
@@ -374,14 +378,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
374
378
  end
375
379
 
376
380
  def test_write_with_msgpack
377
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
381
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n])
378
382
  d_msgpack = create_driver(CONFIG_MSGPACK)
379
383
  emit_msgpack(d_msgpack)
380
384
  assert_equal true, d_msgpack.run
381
385
  end
382
386
 
383
387
  def test_write_with_msgpack_hash_value
384
- setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
388
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
385
389
  d_msgpack = create_driver(CONFIG_MSGPACK)
386
390
  d_msgpack.emit({"key_a" => "val_a", "key_b" => {"foo" => "var"}} , DEFAULT_TIME)
387
391
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -389,7 +393,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
389
393
  end
390
394
 
391
395
  def test_write_with_msgpack_array_value
392
- setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
396
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
393
397
  d_msgpack = create_driver(CONFIG_MSGPACK)
394
398
  d_msgpack.emit({"key_a" => "val_a", "key_b" => ["foo", "var"]} , DEFAULT_TIME)
395
399
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -397,7 +401,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
397
401
  end
398
402
 
399
403
  def test_write_with_msgpack_including_tab_newline_quote
400
- setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
404
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n")
401
405
  d_msgpack = create_driver(CONFIG_MSGPACK)
402
406
  d_msgpack.emit({"key_a" => "val_a_with_\t_tab_\n_newline", "key_b" => "val_b_with_\\_quote"} , DEFAULT_TIME)
403
407
  d_msgpack.emit(RECORD_MSGPACK_B, DEFAULT_TIME)
@@ -413,7 +417,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
413
417
  end
414
418
 
415
419
  def test_write_with_msgpack_no_available_data
416
- setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n])
420
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n])
417
421
  d_msgpack = create_driver(CONFIG_MSGPACK)
418
422
  d_msgpack.emit(RECORD_MSGPACK_A, DEFAULT_TIME)
419
423
  d_msgpack.emit({"key_o" => "val_o", "key_p" => "val_p"}, DEFAULT_TIME)
@@ -421,38 +425,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
421
425
  end
422
426
 
423
427
  def test_write_redshift_connection_error
424
- def PG.connect(dbinfo)
425
- return Class.new do
426
- def initialize(return_keys=[]); end
427
- def exec(sql)
428
- raise PG::Error, "redshift connection error"
429
- end
430
- def close; end
431
- end.new
432
- end
433
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
434
-
428
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
429
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
435
430
  d_csv = create_driver
436
431
  emit_csv(d_csv)
437
- assert_raise(PG::Error) {
432
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
438
433
  d_csv.run
439
434
  }
440
435
  end
441
436
 
442
437
  def test_write_redshift_load_error
443
- PG::Error.module_eval { attr_accessor :result}
444
- def PG.connect(dbinfo)
445
- return Class.new do
446
- def initialize(return_keys=[]); end
447
- def exec(sql)
448
- error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
449
- error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
450
- raise error
451
- end
452
- def close; end
453
- end.new
454
- end
455
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
438
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
439
+ exec_sql_proc: Proc.new {|sql, block|
440
+ msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
441
+ raise Fluent::RedshiftOutput::RedshiftError.new(msg)
442
+ })
456
443
 
457
444
  d_csv = create_driver
458
445
  emit_csv(d_csv)
@@ -460,36 +447,19 @@ class RedshiftOutputTest < Test::Unit::TestCase
460
447
  end
461
448
 
462
449
  def test_write_with_json_redshift_connection_error
463
- def PG.connect(dbinfo)
464
- return Class.new do
465
- def initialize(return_keys=[]); end
466
- def exec(sql, &block)
467
- error = PG::Error.new("redshift connection error")
468
- raise error
469
- end
470
- def close; end
471
- end.new
472
- end
473
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
450
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
451
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
474
452
 
475
453
  d_json = create_driver(CONFIG_JSON)
476
454
  emit_json(d_json)
477
- assert_raise(PG::Error) {
455
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
478
456
  d_json.run
479
457
  }
480
458
  end
481
459
 
482
460
  def test_write_with_json_no_table_on_redshift
483
- def PG.connect(dbinfo)
484
- return Class.new do
485
- def initialize(return_keys=[]); end
486
- def exec(sql, &block)
487
- yield [] if block_given?
488
- end
489
- def close; end
490
- end.new
491
- end
492
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
461
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
462
+ exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
493
463
 
494
464
  d_json = create_driver(CONFIG_JSON)
495
465
  emit_json(d_json)
@@ -497,15 +467,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
497
467
  end
498
468
 
499
469
  def test_write_with_json_failed_to_get_columns
500
- def PG.connect(dbinfo)
501
- return Class.new do
502
- def initialize(return_keys=[]); end
503
- def exec(sql, &block)
504
- end
505
- def close; end
506
- end.new
507
- end
508
- setup_s3_mock("")
470
+ setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
509
471
 
510
472
  d_json = create_driver(CONFIG_JSON)
511
473
  emit_json(d_json)
@@ -515,12 +477,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
515
477
  end
516
478
 
517
479
  def test_write_with_json_fetch_column_with_schema
518
- def PG.connect(dbinfo)
519
- return PGConnectionMock.new(:schemaname => 'test_schema')
520
- end
521
- setup_s3_mock(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
480
+ setup_mocks(%[val_a\tval_b\t\\N\t\\N\t\\N\t\\N\t\\N\t\\N\n\\N\t\\N\tval_c\tval_d\t\\N\t\\N\t\\N\t\\N\n],
481
+ schema_name: 'test_schema')
522
482
  d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
523
483
  emit_json(d_json)
524
484
  assert_equal true, d_json.run
525
485
  end
486
+
487
+ def test_maintenance_mode
488
+ flexmock(File).should_receive(:exists?).with(MAINTENANCE_FILE_PATH_FOR_TEST).and_return(true)
489
+
490
+ d_json = create_driver(CONFIG_JSON)
491
+ emit_json(d_json)
492
+ assert_raise(Fluent::RedshiftOutput::MaintenanceError,
493
+ "Service is in maintenance mode - maintenance_file_path:#{MAINTENANCE_FILE_PATH_FOR_TEST}") {
494
+ d_json.run
495
+ }
496
+ end
526
497
  end
metadata CHANGED
@@ -1,142 +1,131 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift-out
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
5
- prerelease:
4
+ version: 0.4.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Naohiro Sakuma
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2015-01-06 00:00:00.000000000 Z
11
+ date: 2015-12-28 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: fluentd
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: 0.10.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
22
23
  type: :runtime
23
24
  prerelease: false
24
25
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
26
  requirements:
27
- - - ~>
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: 0.10.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
30
33
  - !ruby/object:Gem::Dependency
31
- name: aws-sdk
34
+ name: aws-sdk-v1
32
35
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
36
  requirements:
35
- - - ~>
37
+ - - ">="
36
38
  - !ruby/object:Gem::Version
37
- version: '1.60'
39
+ version: 1.6.3
38
40
  type: :runtime
39
41
  prerelease: false
40
42
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
43
  requirements:
43
- - - ~>
44
+ - - ">="
44
45
  - !ruby/object:Gem::Version
45
- version: '1.60'
46
+ version: 1.6.3
46
47
  - !ruby/object:Gem::Dependency
47
48
  name: multi_json
48
49
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
50
  requirements:
51
- - - ~>
51
+ - - "~>"
52
52
  - !ruby/object:Gem::Version
53
53
  version: '1.10'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
57
  requirements:
59
- - - ~>
58
+ - - "~>"
60
59
  - !ruby/object:Gem::Version
61
60
  version: '1.10'
62
61
  - !ruby/object:Gem::Dependency
63
62
  name: yajl-ruby
64
63
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
64
  requirements:
67
- - - ~>
65
+ - - "~>"
68
66
  - !ruby/object:Gem::Version
69
67
  version: '1.2'
70
68
  type: :runtime
71
69
  prerelease: false
72
70
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
71
  requirements:
75
- - - ~>
72
+ - - "~>"
76
73
  - !ruby/object:Gem::Version
77
74
  version: '1.2'
78
75
  - !ruby/object:Gem::Dependency
79
76
  name: pg
80
77
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
78
  requirements:
83
- - - ~>
79
+ - - "~>"
84
80
  - !ruby/object:Gem::Version
85
81
  version: 0.17.0
86
82
  type: :runtime
87
83
  prerelease: false
88
84
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
85
  requirements:
91
- - - ~>
86
+ - - "~>"
92
87
  - !ruby/object:Gem::Version
93
88
  version: 0.17.0
94
89
  - !ruby/object:Gem::Dependency
95
90
  name: rake
96
91
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
92
  requirements:
99
- - - ! '>='
93
+ - - ">="
100
94
  - !ruby/object:Gem::Version
101
95
  version: '0'
102
96
  type: :development
103
97
  prerelease: false
104
98
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
99
  requirements:
107
- - - ! '>='
100
+ - - ">="
108
101
  - !ruby/object:Gem::Version
109
102
  version: '0'
110
103
  - !ruby/object:Gem::Dependency
111
104
  name: simplecov
112
105
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
106
  requirements:
115
- - - ! '>='
107
+ - - ">="
116
108
  - !ruby/object:Gem::Version
117
109
  version: 0.5.4
118
110
  type: :development
119
111
  prerelease: false
120
112
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
113
  requirements:
123
- - - ! '>='
114
+ - - ">="
124
115
  - !ruby/object:Gem::Version
125
116
  version: 0.5.4
126
117
  - !ruby/object:Gem::Dependency
127
118
  name: flexmock
128
119
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
120
  requirements:
131
- - - ! '>='
121
+ - - ">="
132
122
  - !ruby/object:Gem::Version
133
123
  version: 1.3.1
134
124
  type: :development
135
125
  prerelease: false
136
126
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
127
  requirements:
139
- - - ! '>='
128
+ - - ">="
140
129
  - !ruby/object:Gem::Version
141
130
  version: 1.3.1
142
131
  description: Amazon Redshift output plugin for Fluentd
@@ -146,7 +135,7 @@ executables: []
146
135
  extensions: []
147
136
  extra_rdoc_files: []
148
137
  files:
149
- - .gitignore
138
+ - ".gitignore"
150
139
  - Gemfile
151
140
  - README.md
152
141
  - Rakefile
@@ -157,27 +146,26 @@ files:
157
146
  - test/test_helper.rb
158
147
  homepage: https://github.com/sakuma/fluent-plugin-redshift-out
159
148
  licenses: []
149
+ metadata: {}
160
150
  post_install_message:
161
151
  rdoc_options: []
162
152
  require_paths:
163
153
  - lib
164
154
  required_ruby_version: !ruby/object:Gem::Requirement
165
- none: false
166
155
  requirements:
167
- - - ! '>='
156
+ - - ">="
168
157
  - !ruby/object:Gem::Version
169
158
  version: '0'
170
159
  required_rubygems_version: !ruby/object:Gem::Requirement
171
- none: false
172
160
  requirements:
173
- - - ! '>='
161
+ - - ">="
174
162
  - !ruby/object:Gem::Version
175
163
  version: '0'
176
164
  requirements: []
177
165
  rubyforge_project:
178
- rubygems_version: 1.8.23
166
+ rubygems_version: 2.4.5.1
179
167
  signing_key:
180
- specification_version: 3
168
+ specification_version: 4
181
169
  summary: Amazon Redshift output plugin for Fluentd
182
170
  test_files:
183
171
  - test/plugin/test_out_redshift.rb