fluent-plugin-redshift 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd0f2cbfbaef5ac00d76a43d13bcdb2bcef77d83
4
- data.tar.gz: 6071d3bb18d17fbf424fdced42febfc07adfe114
3
+ metadata.gz: ae21e38c3e70d5c36c0c52bcf3e38183756e4534
4
+ data.tar.gz: a2ebf5bc56f51b9c5f4dbd55fcf07499d9bc9bd8
5
5
  SHA512:
6
- metadata.gz: 52dbbd84055f7e2eeba6fc33199544d247684179fb3a7ccf29400005e8b814f4146b1a075515384defdf011615de1c8f3140c1c4e456f6af7a7779a757056fdf
7
- data.tar.gz: 34174f832c9ddcac43b96bd667cf872ea4403b453576f26921bb7fecf14bfb385c913ed34e74b115dca2184a306e14ae612e5d822a91cf0a3fdf1d1de020eb3c
6
+ metadata.gz: 064a44a6ed9086b1aef44eded321e984a9ccb8afc530e46b03ad99a77b2a2b384c24900ceea220f92c96871c1db18c9503910aff50ccc2b36d042e3d6077d80d
7
+ data.tar.gz: c85eb3832be2f660f8974b535e95430a51c2a13556a97d3f417eec816a05eb78c015ba079080847a46f48b3f6350cbddd3a06bae12464bcdcba4a26776e28390
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.6
@@ -59,7 +59,8 @@ class RedshiftOutput < BufferedOutput
59
59
  }
60
60
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
61
61
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
62
- @copy_sql_template = "copy #{table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
62
+ @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
63
+ @copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
63
64
  end
64
65
 
65
66
  def start
@@ -72,6 +73,7 @@ class RedshiftOutput < BufferedOutput
72
73
  options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
73
74
  @s3 = AWS::S3.new(options)
74
75
  @bucket = @s3.buckets[@s3_bucket]
76
+ @redshift_connection = RedshiftConnection.new(@db_conf)
75
77
  end
76
78
 
77
79
  def format(tag, time, record)
@@ -118,25 +120,26 @@ class RedshiftOutput < BufferedOutput
118
120
  $log.debug format_log("start copying. s3_uri=#{s3_uri}")
119
121
 
120
122
  begin
121
- conn = PG.connect(@db_conf)
122
- conn.exec(sql)
123
+ @redshift_connection.exec(sql)
123
124
  $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
124
- rescue PG::Error => e
125
- $log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
126
- raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
127
- return false # for debug
128
- ensure
129
- conn.close rescue nil if conn
125
+ rescue RedshiftError => e
126
+ if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
127
+ $log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
128
+ return false # for debug
129
+ end
130
+ raise e
130
131
  end
131
132
  true # for debug
132
133
  end
133
134
 
134
135
  protected
136
+
135
137
  def format_log(message)
136
138
  (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
137
139
  end
138
140
 
139
141
  private
142
+
140
143
  def json?
141
144
  @file_type == 'json'
142
145
  end
@@ -158,11 +161,11 @@ class RedshiftOutput < BufferedOutput
158
161
 
159
162
  def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
160
163
  # fetch the table definition from redshift
161
- redshift_table_columns = fetch_table_columns
164
+ redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
162
165
  if redshift_table_columns == nil
163
166
  raise "failed to fetch the redshift table definition."
164
167
  elsif redshift_table_columns.empty?
165
- $log.warn format_log("no table on redshift. table_name=#{table_name_with_schema}")
168
+ $log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
166
169
  return nil
167
170
  end
168
171
 
@@ -204,27 +207,6 @@ class RedshiftOutput < BufferedOutput
204
207
  end
205
208
  end
206
209
 
207
- def fetch_table_columns
208
- begin
209
- columns = nil
210
- conn = PG.connect(@db_conf)
211
- conn.exec(fetch_columns_sql_with_schema) do |result|
212
- columns = result.collect{|row| row['column_name']}
213
- end
214
- columns
215
- ensure
216
- conn.close rescue nil if conn
217
- end
218
- end
219
-
220
- def fetch_columns_sql_with_schema
221
- @fetch_columns_sql ||= if @redshift_schemaname
222
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@redshift_schemaname}' and table_name = '#{@redshift_tablename}' order by ordinal_position;"
223
- else
224
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
225
- end
226
- end
227
-
228
210
  def json_to_hash(json_text)
229
211
  return nil if json_text.to_s.empty?
230
212
 
@@ -279,14 +261,108 @@ class RedshiftOutput < BufferedOutput
279
261
  s3path
280
262
  end
281
263
 
282
- def table_name_with_schema
283
- @table_name_with_schema ||= if @redshift_schemaname
284
- "#{@redshift_schemaname}.#{@redshift_tablename}"
285
- else
286
- @redshift_tablename
287
- end
264
+ class RedshiftError < StandardError
265
+ def initialize(msg)
266
+ case msg
267
+ when PG::Error
268
+ @pg_error = msg
269
+ super(msg.to_s)
270
+ set_backtrace(msg.backtrace)
271
+ else
272
+ super
273
+ end
274
+ end
275
+
276
+ attr_accessor :pg_error
277
+ end
278
+
279
+ class RedshiftConnection
280
+ REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
281
+
282
+ def initialize(db_conf)
283
+ @db_conf = db_conf
284
+ @connection = nil
285
+ end
286
+
287
+ attr_reader :db_conf
288
+
289
+ def fetch_table_columns(table_name, schema_name)
290
+ columns = nil
291
+ exec(fetch_columns_sql(table_name, schema_name)) do |result|
292
+ columns = result.collect{|row| row['column_name']}
293
+ end
294
+ columns
295
+ end
296
+
297
+ def exec(sql, &block)
298
+ conn = @connection
299
+ conn = create_redshift_connection if conn.nil?
300
+ if block
301
+ conn.exec(sql) {|result| block.call(result)}
302
+ else
303
+ conn.exec(sql)
304
+ end
305
+ rescue PG::Error => e
306
+ raise RedshiftError.new(e)
307
+ ensure
308
+ conn.close if conn && @connection.nil?
309
+ end
310
+
311
+ def connect_start
312
+ @connection = create_redshift_connection
313
+ end
314
+
315
+ def close
316
+ @connection.close rescue nil if @connection
317
+ @connection = nil
318
+ end
319
+
320
+ private
321
+
322
+ def create_redshift_connection
323
+ hostaddr = IPSocket.getaddress(db_conf[:host])
324
+ db_conf[:hostaddr] = hostaddr
325
+
326
+ conn = PG::Connection.connect_start(db_conf)
327
+ raise RedshiftError.new("Unable to create a new connection.") unless conn
328
+ if conn.status == PG::CONNECTION_BAD
329
+ raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
330
+ end
331
+
332
+ socket = conn.socket_io
333
+ poll_status = PG::PGRES_POLLING_WRITING
334
+ until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
335
+ case poll_status
336
+ when PG::PGRES_POLLING_READING
337
+ IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
338
+ raise RedshiftError.new("Asynchronous connection timed out!(READING)")
339
+ when PG::PGRES_POLLING_WRITING
340
+ IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
341
+ raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
342
+ end
343
+ poll_status = conn.connect_poll
344
+ end
345
+
346
+ unless conn.status == PG::CONNECTION_OK
347
+ raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
348
+ end
349
+
350
+ conn
351
+ rescue => e
352
+ conn.close rescue nil if conn
353
+ raise RedshiftError.new(e) if e.kind_of?(PG::Error)
354
+ raise e
355
+ end
356
+
357
+ def fetch_columns_sql(table_name, schema_name = nil)
358
+ sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
359
+ sql << " and table_schema = '#{schema_name}'" if schema_name
360
+ sql << " order by ordinal_position;"
361
+ sql
362
+ end
288
363
  end
289
364
  end
290
365
 
291
366
 
367
+
292
368
  end
@@ -12,6 +12,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
12
12
  require 'pg'
13
13
  require 'csv'
14
14
  Fluent::Test.setup
15
+ PG::Error.module_eval { attr_accessor :result}
15
16
  end
16
17
 
17
18
  CONFIG_BASE= %[
@@ -211,54 +212,45 @@ class RedshiftOutputTest < Test::Unit::TestCase
211
212
  d_msgpack.run
212
213
  end
213
214
 
214
- class PGConnectionMock
215
- def initialize(options = {})
216
- @return_keys = options[:return_keys] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
217
- @target_schema = options[:schemaname] || nil
218
- @target_table = options[:tablename] || 'test_table'
219
- end
215
+ def setup_redshift_connection_mock(options = {})
216
+ options ||= {}
217
+ column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
218
+ schema_name = options[:schema_name]
219
+ table_name = options[:table_name] || 'test_table'
220
+ exec_sql_proc = options[:exec_sql_proc]
220
221
 
221
- def expected_column_list_query
222
- if @target_schema
223
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@target_schema}' and table_name = '#{@target_table}'/
222
+ column_list_query_regex =
223
+ if schema_name
224
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
224
225
  else
225
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@target_table}'/
226
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
226
227
  end
227
- end
228
-
229
- def expected_copy_query
230
- if @target_schema
231
- /\Acopy #{@target_schema}.#{@target_table} from/
228
+ copy_query_regex =
229
+ if schema_name
230
+ /\Acopy #{schema_name}.#{table_name} from/
232
231
  else
233
- /\Acopy #{@target_table} from/
232
+ /\Acopy #{table_name} from/
234
233
  end
235
- end
236
234
 
237
- def exec(sql, &block)
238
- if block_given?
239
- if sql =~ expected_column_list_query
240
- yield @return_keys.collect{|key| {'column_name' => key}}
235
+ flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
236
+ conn.should_receive(:exec).and_return do |sql, block|
237
+ if exec_sql_proc
238
+ exec_sql_proc.call(sql, block)
239
+ elsif block
240
+ if sql =~ column_list_query_regex
241
+ block.call column_names.collect{|key| {'column_name' => key}}
242
+ else
243
+ block.call []
244
+ end
241
245
  else
242
- yield []
243
- end
244
- else
245
- unless sql =~ expected_copy_query
246
- error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
247
- error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
248
- raise error
246
+ unless sql =~ copy_query_regex
247
+ error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
248
+ error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
249
+ raise Fluent::RedshiftOutput::RedshiftError.new(error)
250
+ end
249
251
  end
250
252
  end
251
253
  end
252
-
253
- def close
254
- end
255
- end
256
-
257
- def setup_pg_mock
258
- # create mock of PG
259
- def PG.connect(dbinfo)
260
- return PGConnectionMock.new
261
- end
262
254
  end
263
255
 
264
256
  def setup_s3_mock(expected_data)
@@ -305,9 +297,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
305
297
  flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
306
298
  end
307
299
 
308
- def setup_mocks(expected_data)
309
- setup_pg_mock
310
- setup_s3_mock(expected_data) end
300
+ def setup_mocks(expected_data, options = {})
301
+ setup_redshift_connection_mock(options)
302
+ setup_s3_mock(expected_data)
303
+ end
311
304
 
312
305
  def test_write_with_csv
313
306
  setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
@@ -421,38 +414,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
421
414
  end
422
415
 
423
416
  def test_write_redshift_connection_error
424
- def PG.connect(dbinfo)
425
- return Class.new do
426
- def initialize(return_keys=[]); end
427
- def exec(sql)
428
- raise PG::Error, "redshift connection error"
429
- end
430
- def close; end
431
- end.new
432
- end
433
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
434
-
417
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
418
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
435
419
  d_csv = create_driver
436
420
  emit_csv(d_csv)
437
- assert_raise(PG::Error) {
421
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
438
422
  d_csv.run
439
423
  }
440
424
  end
441
425
 
442
426
  def test_write_redshift_load_error
443
- PG::Error.module_eval { attr_accessor :result}
444
- def PG.connect(dbinfo)
445
- return Class.new do
446
- def initialize(return_keys=[]); end
447
- def exec(sql)
448
- error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
449
- error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
450
- raise error
451
- end
452
- def close; end
453
- end.new
454
- end
455
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
427
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
428
+ exec_sql_proc: Proc.new {|sql, block|
429
+ msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
430
+ raise Fluent::RedshiftOutput::RedshiftError.new(msg)
431
+ })
456
432
 
457
433
  d_csv = create_driver
458
434
  emit_csv(d_csv)
@@ -460,36 +436,19 @@ class RedshiftOutputTest < Test::Unit::TestCase
460
436
  end
461
437
 
462
438
  def test_write_with_json_redshift_connection_error
463
- def PG.connect(dbinfo)
464
- return Class.new do
465
- def initialize(return_keys=[]); end
466
- def exec(sql, &block)
467
- error = PG::Error.new("redshift connection error")
468
- raise error
469
- end
470
- def close; end
471
- end.new
472
- end
473
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
439
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
440
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
474
441
 
475
442
  d_json = create_driver(CONFIG_JSON)
476
443
  emit_json(d_json)
477
- assert_raise(PG::Error) {
444
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
478
445
  d_json.run
479
446
  }
480
447
  end
481
448
 
482
449
  def test_write_with_json_no_table_on_redshift
483
- def PG.connect(dbinfo)
484
- return Class.new do
485
- def initialize(return_keys=[]); end
486
- def exec(sql, &block)
487
- yield [] if block_given?
488
- end
489
- def close; end
490
- end.new
491
- end
492
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
450
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
451
+ exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
493
452
 
494
453
  d_json = create_driver(CONFIG_JSON)
495
454
  emit_json(d_json)
@@ -497,15 +456,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
497
456
  end
498
457
 
499
458
  def test_write_with_json_failed_to_get_columns
500
- def PG.connect(dbinfo)
501
- return Class.new do
502
- def initialize(return_keys=[]); end
503
- def exec(sql, &block)
504
- end
505
- def close; end
506
- end.new
507
- end
508
- setup_s3_mock("")
459
+ setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
509
460
 
510
461
  d_json = create_driver(CONFIG_JSON)
511
462
  emit_json(d_json)
@@ -515,10 +466,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
515
466
  end
516
467
 
517
468
  def test_write_with_json_fetch_column_with_schema
518
- def PG.connect(dbinfo)
519
- return PGConnectionMock.new(:schemaname => 'test_schema')
520
- end
521
- setup_s3_mock(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
469
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n],
470
+ schema_name: 'test_schema')
522
471
  d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
523
472
  emit_json(d_json)
524
473
  assert_equal true, d_json.run
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masashi Miyazaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-27 00:00:00.000000000 Z
11
+ date: 2015-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd