fluent-plugin-redshift 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dd0f2cbfbaef5ac00d76a43d13bcdb2bcef77d83
4
- data.tar.gz: 6071d3bb18d17fbf424fdced42febfc07adfe114
3
+ metadata.gz: ae21e38c3e70d5c36c0c52bcf3e38183756e4534
4
+ data.tar.gz: a2ebf5bc56f51b9c5f4dbd55fcf07499d9bc9bd8
5
5
  SHA512:
6
- metadata.gz: 52dbbd84055f7e2eeba6fc33199544d247684179fb3a7ccf29400005e8b814f4146b1a075515384defdf011615de1c8f3140c1c4e456f6af7a7779a757056fdf
7
- data.tar.gz: 34174f832c9ddcac43b96bd667cf872ea4403b453576f26921bb7fecf14bfb385c913ed34e74b115dca2184a306e14ae612e5d822a91cf0a3fdf1d1de020eb3c
6
+ metadata.gz: 064a44a6ed9086b1aef44eded321e984a9ccb8afc530e46b03ad99a77b2a2b384c24900ceea220f92c96871c1db18c9503910aff50ccc2b36d042e3d6077d80d
7
+ data.tar.gz: c85eb3832be2f660f8974b535e95430a51c2a13556a97d3f417eec816a05eb78c015ba079080847a46f48b3f6350cbddd3a06bae12464bcdcba4a26776e28390
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.6
@@ -59,7 +59,8 @@ class RedshiftOutput < BufferedOutput
59
59
  }
60
60
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
61
61
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
62
- @copy_sql_template = "copy #{table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
62
+ @table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
63
+ @copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
63
64
  end
64
65
 
65
66
  def start
@@ -72,6 +73,7 @@ class RedshiftOutput < BufferedOutput
72
73
  options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
73
74
  @s3 = AWS::S3.new(options)
74
75
  @bucket = @s3.buckets[@s3_bucket]
76
+ @redshift_connection = RedshiftConnection.new(@db_conf)
75
77
  end
76
78
 
77
79
  def format(tag, time, record)
@@ -118,25 +120,26 @@ class RedshiftOutput < BufferedOutput
118
120
  $log.debug format_log("start copying. s3_uri=#{s3_uri}")
119
121
 
120
122
  begin
121
- conn = PG.connect(@db_conf)
122
- conn.exec(sql)
123
+ @redshift_connection.exec(sql)
123
124
  $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
124
- rescue PG::Error => e
125
- $log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
126
- raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
127
- return false # for debug
128
- ensure
129
- conn.close rescue nil if conn
125
+ rescue RedshiftError => e
126
+ if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
127
+ $log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
128
+ return false # for debug
129
+ end
130
+ raise e
130
131
  end
131
132
  true # for debug
132
133
  end
133
134
 
134
135
  protected
136
+
135
137
  def format_log(message)
136
138
  (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
137
139
  end
138
140
 
139
141
  private
142
+
140
143
  def json?
141
144
  @file_type == 'json'
142
145
  end
@@ -158,11 +161,11 @@ class RedshiftOutput < BufferedOutput
158
161
 
159
162
  def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
160
163
  # fetch the table definition from redshift
161
- redshift_table_columns = fetch_table_columns
164
+ redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
162
165
  if redshift_table_columns == nil
163
166
  raise "failed to fetch the redshift table definition."
164
167
  elsif redshift_table_columns.empty?
165
- $log.warn format_log("no table on redshift. table_name=#{table_name_with_schema}")
168
+ $log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
166
169
  return nil
167
170
  end
168
171
 
@@ -204,27 +207,6 @@ class RedshiftOutput < BufferedOutput
204
207
  end
205
208
  end
206
209
 
207
- def fetch_table_columns
208
- begin
209
- columns = nil
210
- conn = PG.connect(@db_conf)
211
- conn.exec(fetch_columns_sql_with_schema) do |result|
212
- columns = result.collect{|row| row['column_name']}
213
- end
214
- columns
215
- ensure
216
- conn.close rescue nil if conn
217
- end
218
- end
219
-
220
- def fetch_columns_sql_with_schema
221
- @fetch_columns_sql ||= if @redshift_schemaname
222
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@redshift_schemaname}' and table_name = '#{@redshift_tablename}' order by ordinal_position;"
223
- else
224
- "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
225
- end
226
- end
227
-
228
210
  def json_to_hash(json_text)
229
211
  return nil if json_text.to_s.empty?
230
212
 
@@ -279,14 +261,108 @@ class RedshiftOutput < BufferedOutput
279
261
  s3path
280
262
  end
281
263
 
282
- def table_name_with_schema
283
- @table_name_with_schema ||= if @redshift_schemaname
284
- "#{@redshift_schemaname}.#{@redshift_tablename}"
285
- else
286
- @redshift_tablename
287
- end
264
+ class RedshiftError < StandardError
265
+ def initialize(msg)
266
+ case msg
267
+ when PG::Error
268
+ @pg_error = msg
269
+ super(msg.to_s)
270
+ set_backtrace(msg.backtrace)
271
+ else
272
+ super
273
+ end
274
+ end
275
+
276
+ attr_accessor :pg_error
277
+ end
278
+
279
+ class RedshiftConnection
280
+ REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
281
+
282
+ def initialize(db_conf)
283
+ @db_conf = db_conf
284
+ @connection = nil
285
+ end
286
+
287
+ attr_reader :db_conf
288
+
289
+ def fetch_table_columns(table_name, schema_name)
290
+ columns = nil
291
+ exec(fetch_columns_sql(table_name, schema_name)) do |result|
292
+ columns = result.collect{|row| row['column_name']}
293
+ end
294
+ columns
295
+ end
296
+
297
+ def exec(sql, &block)
298
+ conn = @connection
299
+ conn = create_redshift_connection if conn.nil?
300
+ if block
301
+ conn.exec(sql) {|result| block.call(result)}
302
+ else
303
+ conn.exec(sql)
304
+ end
305
+ rescue PG::Error => e
306
+ raise RedshiftError.new(e)
307
+ ensure
308
+ conn.close if conn && @connection.nil?
309
+ end
310
+
311
+ def connect_start
312
+ @connection = create_redshift_connection
313
+ end
314
+
315
+ def close
316
+ @connection.close rescue nil if @connection
317
+ @connection = nil
318
+ end
319
+
320
+ private
321
+
322
+ def create_redshift_connection
323
+ hostaddr = IPSocket.getaddress(db_conf[:host])
324
+ db_conf[:hostaddr] = hostaddr
325
+
326
+ conn = PG::Connection.connect_start(db_conf)
327
+ raise RedshiftError.new("Unable to create a new connection.") unless conn
328
+ if conn.status == PG::CONNECTION_BAD
329
+ raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
330
+ end
331
+
332
+ socket = conn.socket_io
333
+ poll_status = PG::PGRES_POLLING_WRITING
334
+ until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
335
+ case poll_status
336
+ when PG::PGRES_POLLING_READING
337
+ IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
338
+ raise RedshiftError.new("Asynchronous connection timed out!(READING)")
339
+ when PG::PGRES_POLLING_WRITING
340
+ IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
341
+ raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
342
+ end
343
+ poll_status = conn.connect_poll
344
+ end
345
+
346
+ unless conn.status == PG::CONNECTION_OK
347
+ raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
348
+ end
349
+
350
+ conn
351
+ rescue => e
352
+ conn.close rescue nil if conn
353
+ raise RedshiftError.new(e) if e.kind_of?(PG::Error)
354
+ raise e
355
+ end
356
+
357
+ def fetch_columns_sql(table_name, schema_name = nil)
358
+ sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
359
+ sql << " and table_schema = '#{schema_name}'" if schema_name
360
+ sql << " order by ordinal_position;"
361
+ sql
362
+ end
288
363
  end
289
364
  end
290
365
 
291
366
 
367
+
292
368
  end
@@ -12,6 +12,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
12
12
  require 'pg'
13
13
  require 'csv'
14
14
  Fluent::Test.setup
15
+ PG::Error.module_eval { attr_accessor :result}
15
16
  end
16
17
 
17
18
  CONFIG_BASE= %[
@@ -211,54 +212,45 @@ class RedshiftOutputTest < Test::Unit::TestCase
211
212
  d_msgpack.run
212
213
  end
213
214
 
214
- class PGConnectionMock
215
- def initialize(options = {})
216
- @return_keys = options[:return_keys] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
217
- @target_schema = options[:schemaname] || nil
218
- @target_table = options[:tablename] || 'test_table'
219
- end
215
+ def setup_redshift_connection_mock(options = {})
216
+ options ||= {}
217
+ column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
218
+ schema_name = options[:schema_name]
219
+ table_name = options[:table_name] || 'test_table'
220
+ exec_sql_proc = options[:exec_sql_proc]
220
221
 
221
- def expected_column_list_query
222
- if @target_schema
223
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@target_schema}' and table_name = '#{@target_table}'/
222
+ column_list_query_regex =
223
+ if schema_name
224
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
224
225
  else
225
- /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@target_table}'/
226
+ /\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
226
227
  end
227
- end
228
-
229
- def expected_copy_query
230
- if @target_schema
231
- /\Acopy #{@target_schema}.#{@target_table} from/
228
+ copy_query_regex =
229
+ if schema_name
230
+ /\Acopy #{schema_name}.#{table_name} from/
232
231
  else
233
- /\Acopy #{@target_table} from/
232
+ /\Acopy #{table_name} from/
234
233
  end
235
- end
236
234
 
237
- def exec(sql, &block)
238
- if block_given?
239
- if sql =~ expected_column_list_query
240
- yield @return_keys.collect{|key| {'column_name' => key}}
235
+ flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
236
+ conn.should_receive(:exec).and_return do |sql, block|
237
+ if exec_sql_proc
238
+ exec_sql_proc.call(sql, block)
239
+ elsif block
240
+ if sql =~ column_list_query_regex
241
+ block.call column_names.collect{|key| {'column_name' => key}}
242
+ else
243
+ block.call []
244
+ end
241
245
  else
242
- yield []
243
- end
244
- else
245
- unless sql =~ expected_copy_query
246
- error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
247
- error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
248
- raise error
246
+ unless sql =~ copy_query_regex
247
+ error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
248
+ error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
249
+ raise Fluent::RedshiftOutput::RedshiftError.new(error)
250
+ end
249
251
  end
250
252
  end
251
253
  end
252
-
253
- def close
254
- end
255
- end
256
-
257
- def setup_pg_mock
258
- # create mock of PG
259
- def PG.connect(dbinfo)
260
- return PGConnectionMock.new
261
- end
262
254
  end
263
255
 
264
256
  def setup_s3_mock(expected_data)
@@ -305,9 +297,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
305
297
  flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
306
298
  end
307
299
 
308
- def setup_mocks(expected_data)
309
- setup_pg_mock
310
- setup_s3_mock(expected_data) end
300
+ def setup_mocks(expected_data, options = {})
301
+ setup_redshift_connection_mock(options)
302
+ setup_s3_mock(expected_data)
303
+ end
311
304
 
312
305
  def test_write_with_csv
313
306
  setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
@@ -421,38 +414,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
421
414
  end
422
415
 
423
416
  def test_write_redshift_connection_error
424
- def PG.connect(dbinfo)
425
- return Class.new do
426
- def initialize(return_keys=[]); end
427
- def exec(sql)
428
- raise PG::Error, "redshift connection error"
429
- end
430
- def close; end
431
- end.new
432
- end
433
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
434
-
417
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
418
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
435
419
  d_csv = create_driver
436
420
  emit_csv(d_csv)
437
- assert_raise(PG::Error) {
421
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
438
422
  d_csv.run
439
423
  }
440
424
  end
441
425
 
442
426
  def test_write_redshift_load_error
443
- PG::Error.module_eval { attr_accessor :result}
444
- def PG.connect(dbinfo)
445
- return Class.new do
446
- def initialize(return_keys=[]); end
447
- def exec(sql)
448
- error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
449
- error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
450
- raise error
451
- end
452
- def close; end
453
- end.new
454
- end
455
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
427
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
428
+ exec_sql_proc: Proc.new {|sql, block|
429
+ msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
430
+ raise Fluent::RedshiftOutput::RedshiftError.new(msg)
431
+ })
456
432
 
457
433
  d_csv = create_driver
458
434
  emit_csv(d_csv)
@@ -460,36 +436,19 @@ class RedshiftOutputTest < Test::Unit::TestCase
460
436
  end
461
437
 
462
438
  def test_write_with_json_redshift_connection_error
463
- def PG.connect(dbinfo)
464
- return Class.new do
465
- def initialize(return_keys=[]); end
466
- def exec(sql, &block)
467
- error = PG::Error.new("redshift connection error")
468
- raise error
469
- end
470
- def close; end
471
- end.new
472
- end
473
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
439
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
440
+ exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
474
441
 
475
442
  d_json = create_driver(CONFIG_JSON)
476
443
  emit_json(d_json)
477
- assert_raise(PG::Error) {
444
+ assert_raise(Fluent::RedshiftOutput::RedshiftError) {
478
445
  d_json.run
479
446
  }
480
447
  end
481
448
 
482
449
  def test_write_with_json_no_table_on_redshift
483
- def PG.connect(dbinfo)
484
- return Class.new do
485
- def initialize(return_keys=[]); end
486
- def exec(sql, &block)
487
- yield [] if block_given?
488
- end
489
- def close; end
490
- end.new
491
- end
492
- setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
450
+ setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
451
+ exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
493
452
 
494
453
  d_json = create_driver(CONFIG_JSON)
495
454
  emit_json(d_json)
@@ -497,15 +456,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
497
456
  end
498
457
 
499
458
  def test_write_with_json_failed_to_get_columns
500
- def PG.connect(dbinfo)
501
- return Class.new do
502
- def initialize(return_keys=[]); end
503
- def exec(sql, &block)
504
- end
505
- def close; end
506
- end.new
507
- end
508
- setup_s3_mock("")
459
+ setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
509
460
 
510
461
  d_json = create_driver(CONFIG_JSON)
511
462
  emit_json(d_json)
@@ -515,10 +466,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
515
466
  end
516
467
 
517
468
  def test_write_with_json_fetch_column_with_schema
518
- def PG.connect(dbinfo)
519
- return PGConnectionMock.new(:schemaname => 'test_schema')
520
- end
521
- setup_s3_mock(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
469
+ setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n],
470
+ schema_name: 'test_schema')
522
471
  d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
523
472
  emit_json(d_json)
524
473
  assert_equal true, d_json.run
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masashi Miyazaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-27 00:00:00.000000000 Z
11
+ date: 2015-08-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd