fluent-plugin-redshift 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/fluent/plugin/out_redshift.rb +114 -38
- data/test/plugin/test_out_redshift.rb +50 -101
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae21e38c3e70d5c36c0c52bcf3e38183756e4534
|
4
|
+
data.tar.gz: a2ebf5bc56f51b9c5f4dbd55fcf07499d9bc9bd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 064a44a6ed9086b1aef44eded321e984a9ccb8afc530e46b03ad99a77b2a2b384c24900ceea220f92c96871c1db18c9503910aff50ccc2b36d042e3d6077d80d
|
7
|
+
data.tar.gz: c85eb3832be2f660f8974b535e95430a51c2a13556a97d3f417eec816a05eb78c015ba079080847a46f48b3f6350cbddd3a06bae12464bcdcba4a26776e28390
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
@@ -59,7 +59,8 @@ class RedshiftOutput < BufferedOutput
|
|
59
59
|
}
|
60
60
|
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
61
61
|
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
62
|
-
@
|
62
|
+
@table_name_with_schema = [@redshift_schemaname, @redshift_tablename].compact.join('.')
|
63
|
+
@copy_sql_template = "copy #{@table_name_with_schema} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP ESCAPE #{@redshift_copy_base_options} #{@redshift_copy_options};"
|
63
64
|
end
|
64
65
|
|
65
66
|
def start
|
@@ -72,6 +73,7 @@ class RedshiftOutput < BufferedOutput
|
|
72
73
|
options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
|
73
74
|
@s3 = AWS::S3.new(options)
|
74
75
|
@bucket = @s3.buckets[@s3_bucket]
|
76
|
+
@redshift_connection = RedshiftConnection.new(@db_conf)
|
75
77
|
end
|
76
78
|
|
77
79
|
def format(tag, time, record)
|
@@ -118,25 +120,26 @@ class RedshiftOutput < BufferedOutput
|
|
118
120
|
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
119
121
|
|
120
122
|
begin
|
121
|
-
|
122
|
-
conn.exec(sql)
|
123
|
+
@redshift_connection.exec(sql)
|
123
124
|
$log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
|
124
|
-
rescue
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
125
|
+
rescue RedshiftError => e
|
126
|
+
if e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
|
127
|
+
$log.error format_log("failed to copy data into redshift due to load error. s3_uri=#{s3_uri}"), :error=>e.to_s
|
128
|
+
return false # for debug
|
129
|
+
end
|
130
|
+
raise e
|
130
131
|
end
|
131
132
|
true # for debug
|
132
133
|
end
|
133
134
|
|
134
135
|
protected
|
136
|
+
|
135
137
|
def format_log(message)
|
136
138
|
(@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
|
137
139
|
end
|
138
140
|
|
139
141
|
private
|
142
|
+
|
140
143
|
def json?
|
141
144
|
@file_type == 'json'
|
142
145
|
end
|
@@ -158,11 +161,11 @@ class RedshiftOutput < BufferedOutput
|
|
158
161
|
|
159
162
|
def create_gz_file_from_structured_data(dst_file, chunk, delimiter)
|
160
163
|
# fetch the table definition from redshift
|
161
|
-
redshift_table_columns = fetch_table_columns
|
164
|
+
redshift_table_columns = @redshift_connection.fetch_table_columns(@redshift_tablename, @redshift_schemaname)
|
162
165
|
if redshift_table_columns == nil
|
163
166
|
raise "failed to fetch the redshift table definition."
|
164
167
|
elsif redshift_table_columns.empty?
|
165
|
-
$log.warn format_log("no table on redshift. table_name=#{table_name_with_schema}")
|
168
|
+
$log.warn format_log("no table on redshift. table_name=#{@table_name_with_schema}")
|
166
169
|
return nil
|
167
170
|
end
|
168
171
|
|
@@ -204,27 +207,6 @@ class RedshiftOutput < BufferedOutput
|
|
204
207
|
end
|
205
208
|
end
|
206
209
|
|
207
|
-
def fetch_table_columns
|
208
|
-
begin
|
209
|
-
columns = nil
|
210
|
-
conn = PG.connect(@db_conf)
|
211
|
-
conn.exec(fetch_columns_sql_with_schema) do |result|
|
212
|
-
columns = result.collect{|row| row['column_name']}
|
213
|
-
end
|
214
|
-
columns
|
215
|
-
ensure
|
216
|
-
conn.close rescue nil if conn
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
def fetch_columns_sql_with_schema
|
221
|
-
@fetch_columns_sql ||= if @redshift_schemaname
|
222
|
-
"select column_name from INFORMATION_SCHEMA.COLUMNS where table_schema = '#{@redshift_schemaname}' and table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
223
|
-
else
|
224
|
-
"select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{@redshift_tablename}' order by ordinal_position;"
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
210
|
def json_to_hash(json_text)
|
229
211
|
return nil if json_text.to_s.empty?
|
230
212
|
|
@@ -279,14 +261,108 @@ class RedshiftOutput < BufferedOutput
|
|
279
261
|
s3path
|
280
262
|
end
|
281
263
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
264
|
+
class RedshiftError < StandardError
|
265
|
+
def initialize(msg)
|
266
|
+
case msg
|
267
|
+
when PG::Error
|
268
|
+
@pg_error = msg
|
269
|
+
super(msg.to_s)
|
270
|
+
set_backtrace(msg.backtrace)
|
271
|
+
else
|
272
|
+
super
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
attr_accessor :pg_error
|
277
|
+
end
|
278
|
+
|
279
|
+
class RedshiftConnection
|
280
|
+
REDSHIFT_CONNECT_TIMEOUT = 10.0 # 10sec
|
281
|
+
|
282
|
+
def initialize(db_conf)
|
283
|
+
@db_conf = db_conf
|
284
|
+
@connection = nil
|
285
|
+
end
|
286
|
+
|
287
|
+
attr_reader :db_conf
|
288
|
+
|
289
|
+
def fetch_table_columns(table_name, schema_name)
|
290
|
+
columns = nil
|
291
|
+
exec(fetch_columns_sql(table_name, schema_name)) do |result|
|
292
|
+
columns = result.collect{|row| row['column_name']}
|
293
|
+
end
|
294
|
+
columns
|
295
|
+
end
|
296
|
+
|
297
|
+
def exec(sql, &block)
|
298
|
+
conn = @connection
|
299
|
+
conn = create_redshift_connection if conn.nil?
|
300
|
+
if block
|
301
|
+
conn.exec(sql) {|result| block.call(result)}
|
302
|
+
else
|
303
|
+
conn.exec(sql)
|
304
|
+
end
|
305
|
+
rescue PG::Error => e
|
306
|
+
raise RedshiftError.new(e)
|
307
|
+
ensure
|
308
|
+
conn.close if conn && @connection.nil?
|
309
|
+
end
|
310
|
+
|
311
|
+
def connect_start
|
312
|
+
@connection = create_redshift_connection
|
313
|
+
end
|
314
|
+
|
315
|
+
def close
|
316
|
+
@connection.close rescue nil if @connection
|
317
|
+
@connection = nil
|
318
|
+
end
|
319
|
+
|
320
|
+
private
|
321
|
+
|
322
|
+
def create_redshift_connection
|
323
|
+
hostaddr = IPSocket.getaddress(db_conf[:host])
|
324
|
+
db_conf[:hostaddr] = hostaddr
|
325
|
+
|
326
|
+
conn = PG::Connection.connect_start(db_conf)
|
327
|
+
raise RedshiftError.new("Unable to create a new connection.") unless conn
|
328
|
+
if conn.status == PG::CONNECTION_BAD
|
329
|
+
raise RedshiftError.new("Connection failed: %s" % [ conn.error_message ])
|
330
|
+
end
|
331
|
+
|
332
|
+
socket = conn.socket_io
|
333
|
+
poll_status = PG::PGRES_POLLING_WRITING
|
334
|
+
until poll_status == PG::PGRES_POLLING_OK || poll_status == PG::PGRES_POLLING_FAILED
|
335
|
+
case poll_status
|
336
|
+
when PG::PGRES_POLLING_READING
|
337
|
+
IO.select([socket], nil, nil, REDSHIFT_CONNECT_TIMEOUT) or
|
338
|
+
raise RedshiftError.new("Asynchronous connection timed out!(READING)")
|
339
|
+
when PG::PGRES_POLLING_WRITING
|
340
|
+
IO.select(nil, [socket], nil, REDSHIFT_CONNECT_TIMEOUT) or
|
341
|
+
raise RedshiftError.new("Asynchronous connection timed out!(WRITING)")
|
342
|
+
end
|
343
|
+
poll_status = conn.connect_poll
|
344
|
+
end
|
345
|
+
|
346
|
+
unless conn.status == PG::CONNECTION_OK
|
347
|
+
raise RedshiftError, ("Connect failed: %s" % [conn.error_message.to_s.lines.uniq.join(" ")])
|
348
|
+
end
|
349
|
+
|
350
|
+
conn
|
351
|
+
rescue => e
|
352
|
+
conn.close rescue nil if conn
|
353
|
+
raise RedshiftError.new(e) if e.kind_of?(PG::Error)
|
354
|
+
raise e
|
355
|
+
end
|
356
|
+
|
357
|
+
def fetch_columns_sql(table_name, schema_name = nil)
|
358
|
+
sql = "select column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'"
|
359
|
+
sql << " and table_schema = '#{schema_name}'" if schema_name
|
360
|
+
sql << " order by ordinal_position;"
|
361
|
+
sql
|
362
|
+
end
|
288
363
|
end
|
289
364
|
end
|
290
365
|
|
291
366
|
|
367
|
+
|
292
368
|
end
|
@@ -12,6 +12,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
12
12
|
require 'pg'
|
13
13
|
require 'csv'
|
14
14
|
Fluent::Test.setup
|
15
|
+
PG::Error.module_eval { attr_accessor :result}
|
15
16
|
end
|
16
17
|
|
17
18
|
CONFIG_BASE= %[
|
@@ -211,54 +212,45 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
211
212
|
d_msgpack.run
|
212
213
|
end
|
213
214
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
215
|
+
def setup_redshift_connection_mock(options = {})
|
216
|
+
options ||= {}
|
217
|
+
column_names = options[:column_names] || ['key_a', 'key_b', 'key_c', 'key_d', 'key_e', 'key_f', 'key_g', 'key_h']
|
218
|
+
schema_name = options[:schema_name]
|
219
|
+
table_name = options[:table_name] || 'test_table'
|
220
|
+
exec_sql_proc = options[:exec_sql_proc]
|
220
221
|
|
221
|
-
|
222
|
-
if
|
223
|
-
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where
|
222
|
+
column_list_query_regex =
|
223
|
+
if schema_name
|
224
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}' and table_schema = '#{schema_name}'/
|
224
225
|
else
|
225
|
-
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{
|
226
|
+
/\Aselect column_name from INFORMATION_SCHEMA.COLUMNS where table_name = '#{table_name}'/
|
226
227
|
end
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
if @target_schema
|
231
|
-
/\Acopy #{@target_schema}.#{@target_table} from/
|
228
|
+
copy_query_regex =
|
229
|
+
if schema_name
|
230
|
+
/\Acopy #{schema_name}.#{table_name} from/
|
232
231
|
else
|
233
|
-
/\Acopy #{
|
232
|
+
/\Acopy #{table_name} from/
|
234
233
|
end
|
235
|
-
end
|
236
234
|
|
237
|
-
|
238
|
-
|
239
|
-
if
|
240
|
-
|
235
|
+
flexmock(Fluent::RedshiftOutput::RedshiftConnection).new_instances do |conn|
|
236
|
+
conn.should_receive(:exec).and_return do |sql, block|
|
237
|
+
if exec_sql_proc
|
238
|
+
exec_sql_proc.call(sql, block)
|
239
|
+
elsif block
|
240
|
+
if sql =~ column_list_query_regex
|
241
|
+
block.call column_names.collect{|key| {'column_name' => key}}
|
242
|
+
else
|
243
|
+
block.call []
|
244
|
+
end
|
241
245
|
else
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
|
248
|
-
raise error
|
246
|
+
unless sql =~ copy_query_regex
|
247
|
+
error = PG::Error.new("ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details.")
|
248
|
+
error.result = "ERROR: Load into table '#{@target_table}' failed. Check 'stl_load_errors' system table for details."
|
249
|
+
raise Fluent::RedshiftOutput::RedshiftError.new(error)
|
250
|
+
end
|
249
251
|
end
|
250
252
|
end
|
251
253
|
end
|
252
|
-
|
253
|
-
def close
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
def setup_pg_mock
|
258
|
-
# create mock of PG
|
259
|
-
def PG.connect(dbinfo)
|
260
|
-
return PGConnectionMock.new
|
261
|
-
end
|
262
254
|
end
|
263
255
|
|
264
256
|
def setup_s3_mock(expected_data)
|
@@ -305,9 +297,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
305
297
|
flexmock(Tempfile).new_instances.should_receive(:close!).at_least.once
|
306
298
|
end
|
307
299
|
|
308
|
-
def setup_mocks(expected_data)
|
309
|
-
|
310
|
-
setup_s3_mock(expected_data)
|
300
|
+
def setup_mocks(expected_data, options = {})
|
301
|
+
setup_redshift_connection_mock(options)
|
302
|
+
setup_s3_mock(expected_data)
|
303
|
+
end
|
311
304
|
|
312
305
|
def test_write_with_csv
|
313
306
|
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
@@ -421,38 +414,21 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
421
414
|
end
|
422
415
|
|
423
416
|
def test_write_redshift_connection_error
|
424
|
-
|
425
|
-
|
426
|
-
def initialize(return_keys=[]); end
|
427
|
-
def exec(sql)
|
428
|
-
raise PG::Error, "redshift connection error"
|
429
|
-
end
|
430
|
-
def close; end
|
431
|
-
end.new
|
432
|
-
end
|
433
|
-
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
434
|
-
|
417
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
418
|
+
exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError, "redshift connection error" })
|
435
419
|
d_csv = create_driver
|
436
420
|
emit_csv(d_csv)
|
437
|
-
assert_raise(
|
421
|
+
assert_raise(Fluent::RedshiftOutput::RedshiftError) {
|
438
422
|
d_csv.run
|
439
423
|
}
|
440
424
|
end
|
441
425
|
|
442
426
|
def test_write_redshift_load_error
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
|
449
|
-
error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
|
450
|
-
raise error
|
451
|
-
end
|
452
|
-
def close; end
|
453
|
-
end.new
|
454
|
-
end
|
455
|
-
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
427
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
428
|
+
exec_sql_proc: Proc.new {|sql, block|
|
429
|
+
msg = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
|
430
|
+
raise Fluent::RedshiftOutput::RedshiftError.new(msg)
|
431
|
+
})
|
456
432
|
|
457
433
|
d_csv = create_driver
|
458
434
|
emit_csv(d_csv)
|
@@ -460,36 +436,19 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
460
436
|
end
|
461
437
|
|
462
438
|
def test_write_with_json_redshift_connection_error
|
463
|
-
|
464
|
-
|
465
|
-
def initialize(return_keys=[]); end
|
466
|
-
def exec(sql, &block)
|
467
|
-
error = PG::Error.new("redshift connection error")
|
468
|
-
raise error
|
469
|
-
end
|
470
|
-
def close; end
|
471
|
-
end.new
|
472
|
-
end
|
473
|
-
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
439
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
440
|
+
exec_sql_proc: Proc.new {|sql, block| raise Fluent::RedshiftOutput::RedshiftError.new("redshift connection error")})
|
474
441
|
|
475
442
|
d_json = create_driver(CONFIG_JSON)
|
476
443
|
emit_json(d_json)
|
477
|
-
assert_raise(
|
444
|
+
assert_raise(Fluent::RedshiftOutput::RedshiftError) {
|
478
445
|
d_json.run
|
479
446
|
}
|
480
447
|
end
|
481
448
|
|
482
449
|
def test_write_with_json_no_table_on_redshift
|
483
|
-
|
484
|
-
|
485
|
-
def initialize(return_keys=[]); end
|
486
|
-
def exec(sql, &block)
|
487
|
-
yield [] if block_given?
|
488
|
-
end
|
489
|
-
def close; end
|
490
|
-
end.new
|
491
|
-
end
|
492
|
-
setup_s3_mock(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
450
|
+
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n],
|
451
|
+
exec_sql_proc: Proc.new {|sql, block| block.call [] if block })
|
493
452
|
|
494
453
|
d_json = create_driver(CONFIG_JSON)
|
495
454
|
emit_json(d_json)
|
@@ -497,15 +456,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
497
456
|
end
|
498
457
|
|
499
458
|
def test_write_with_json_failed_to_get_columns
|
500
|
-
|
501
|
-
return Class.new do
|
502
|
-
def initialize(return_keys=[]); end
|
503
|
-
def exec(sql, &block)
|
504
|
-
end
|
505
|
-
def close; end
|
506
|
-
end.new
|
507
|
-
end
|
508
|
-
setup_s3_mock("")
|
459
|
+
setup_mocks("", exec_sql_proc: Proc.new {|sql, block| nil})
|
509
460
|
|
510
461
|
d_json = create_driver(CONFIG_JSON)
|
511
462
|
emit_json(d_json)
|
@@ -515,10 +466,8 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
515
466
|
end
|
516
467
|
|
517
468
|
def test_write_with_json_fetch_column_with_schema
|
518
|
-
|
519
|
-
|
520
|
-
end
|
521
|
-
setup_s3_mock(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
469
|
+
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n],
|
470
|
+
schema_name: 'test_schema')
|
522
471
|
d_json = create_driver(CONFIG_JSON_WITH_SCHEMA)
|
523
472
|
emit_json(d_json)
|
524
473
|
assert_equal true, d_json.run
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masashi Miyazaki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05
|
11
|
+
date: 2015-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|