fluent-plugin-redshift 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/fluent/plugin/out_redshift.rb +45 -31
- data/test/plugin/test_out_redshift.rb +27 -31
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
@@ -4,6 +4,9 @@ module Fluent
|
|
4
4
|
class RedshiftOutput < BufferedOutput
|
5
5
|
Fluent::Plugin.register_output('redshift', self)
|
6
6
|
|
7
|
+
# ignore load table error. (invalid data format)
|
8
|
+
IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
|
9
|
+
|
7
10
|
def initialize
|
8
11
|
super
|
9
12
|
require 'aws-sdk'
|
@@ -34,6 +37,8 @@ class RedshiftOutput < BufferedOutput
|
|
34
37
|
# file format
|
35
38
|
config_param :file_type, :string, :default => nil # json, tsv, csv
|
36
39
|
config_param :delimiter, :string, :default => nil
|
40
|
+
# for debug
|
41
|
+
config_param :log_suffix, :string, :default => ''
|
37
42
|
|
38
43
|
def configure(conf)
|
39
44
|
super
|
@@ -48,8 +53,8 @@ class RedshiftOutput < BufferedOutput
|
|
48
53
|
password:@redshift_password
|
49
54
|
}
|
50
55
|
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
51
|
-
$log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
|
52
|
-
@copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}'
|
56
|
+
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
57
|
+
@copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP TRUNCATECOLUMNS ESCAPE FILLRECORD ACCEPTANYDATE;"
|
53
58
|
end
|
54
59
|
|
55
60
|
def start
|
@@ -69,6 +74,8 @@ class RedshiftOutput < BufferedOutput
|
|
69
74
|
end
|
70
75
|
|
71
76
|
def write(chunk)
|
77
|
+
$log.debug format_log("start creating gz.")
|
78
|
+
|
72
79
|
# create a gz file
|
73
80
|
tmp = Tempfile.new("s3-")
|
74
81
|
tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
|
@@ -76,8 +83,8 @@ class RedshiftOutput < BufferedOutput
|
|
76
83
|
|
77
84
|
# no data -> skip
|
78
85
|
unless tmp
|
79
|
-
$log.debug "received no valid data. "
|
80
|
-
return
|
86
|
+
$log.debug format_log("received no valid data. ")
|
87
|
+
return false # for debug
|
81
88
|
end
|
82
89
|
|
83
90
|
# create a file path with time format
|
@@ -89,18 +96,25 @@ class RedshiftOutput < BufferedOutput
|
|
89
96
|
# copy gz on s3 to redshift
|
90
97
|
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
91
98
|
sql = @copy_sql_template % [s3_uri, @aws_sec_key]
|
92
|
-
$log.debug
|
99
|
+
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
93
100
|
conn = nil
|
94
101
|
begin
|
95
102
|
conn = PG.connect(@db_conf)
|
96
103
|
conn.exec(sql)
|
97
|
-
$log.info "completed copying to redshift. s3_uri=#{s3_uri}"
|
104
|
+
$log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
|
98
105
|
rescue PG::Error => e
|
99
|
-
$log.error "failed to copy data into redshift.
|
100
|
-
raise e
|
106
|
+
$log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
|
107
|
+
raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
|
108
|
+
return false # for debug
|
101
109
|
ensure
|
102
110
|
conn.close rescue nil if conn
|
103
111
|
end
|
112
|
+
true # for debug
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
def format_log(message)
|
117
|
+
(@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
|
104
118
|
end
|
105
119
|
|
106
120
|
private
|
@@ -125,27 +139,24 @@ class RedshiftOutput < BufferedOutput
|
|
125
139
|
if redshift_table_columns == nil
|
126
140
|
raise "failed to fetch the redshift table definition."
|
127
141
|
elsif redshift_table_columns.empty?
|
128
|
-
$log.warn "no table on redshift. table_name=#{@redshift_tablename}"
|
142
|
+
$log.warn format_log("no table on redshift. table_name=#{@redshift_tablename}")
|
129
143
|
return nil
|
130
144
|
end
|
131
145
|
|
132
146
|
# convert json to tsv format text
|
133
|
-
table_texts = ""
|
134
|
-
chunk.msgpack_each do |record|
|
135
|
-
begin
|
136
|
-
table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
|
137
|
-
rescue => e
|
138
|
-
$log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
|
139
|
-
$log.error_backtrace
|
140
|
-
end
|
141
|
-
end
|
142
|
-
return nil if table_texts.empty?
|
143
|
-
|
144
|
-
# create gz
|
145
147
|
gzw = nil
|
146
148
|
begin
|
147
149
|
gzw = Zlib::GzipWriter.new(dst_file)
|
148
|
-
|
150
|
+
chunk.msgpack_each do |record|
|
151
|
+
begin
|
152
|
+
tsv_text = json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
|
153
|
+
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
154
|
+
rescue => e
|
155
|
+
$log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>$!.to_s
|
156
|
+
$log.error_backtrace
|
157
|
+
end
|
158
|
+
end
|
159
|
+
return nil unless gzw.pos > 0
|
149
160
|
ensure
|
150
161
|
gzw.close rescue nil if gzw
|
151
162
|
end
|
@@ -185,7 +196,7 @@ class RedshiftOutput < BufferedOutput
|
|
185
196
|
begin
|
186
197
|
json_obj = JSON.parse(json_text)
|
187
198
|
rescue => e
|
188
|
-
$log.warn "failed to parse json. ", :error=>e.to_s
|
199
|
+
$log.warn format_log("failed to parse json. "), :error=>e.to_s
|
189
200
|
return ""
|
190
201
|
end
|
191
202
|
return "" unless json_obj
|
@@ -198,19 +209,22 @@ class RedshiftOutput < BufferedOutput
|
|
198
209
|
val.to_s unless val.nil?
|
199
210
|
end
|
200
211
|
if val_list.all?{|v| v.nil? or v.empty?}
|
201
|
-
$log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
|
212
|
+
$log.warn format_log("no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}")
|
202
213
|
return ""
|
203
214
|
end
|
204
215
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
216
|
+
generate_line_with_delimiter(val_list, delimiter)
|
217
|
+
end
|
218
|
+
|
219
|
+
def generate_line_with_delimiter(val_list, delimiter)
|
220
|
+
val_list = val_list.collect do |val|
|
221
|
+
if val.nil? or val.empty?
|
222
|
+
""
|
223
|
+
else
|
224
|
+
val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
|
209
225
|
end
|
210
|
-
rescue => e
|
211
|
-
$log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
|
212
|
-
raise e
|
213
226
|
end
|
227
|
+
val_list.join(delimiter) + "\n"
|
214
228
|
end
|
215
229
|
|
216
230
|
def create_s3path(bucket, path)
|
@@ -26,6 +26,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
26
26
|
redshift_tablename test_table
|
27
27
|
buffer_type memory
|
28
28
|
utc
|
29
|
+
log_suffix id:5 host:localhost
|
29
30
|
]
|
30
31
|
CONFIG_CSV= %[
|
31
32
|
#{CONFIG_BASE}
|
@@ -127,6 +128,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
127
128
|
assert_equal "pipe", d4.instance.file_type
|
128
129
|
assert_equal "|", d4.instance.delimiter
|
129
130
|
end
|
131
|
+
def test_configure_no_log_suffix
|
132
|
+
d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
|
133
|
+
assert_equal "", d.instance.log_suffix
|
134
|
+
end
|
130
135
|
|
131
136
|
def emit_csv(d)
|
132
137
|
d.emit(RECORD_CSV_A, DEFAULT_TIME)
|
@@ -231,30 +236,38 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
231
236
|
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
232
237
|
d_csv = create_driver
|
233
238
|
emit_csv(d_csv)
|
234
|
-
d_csv.run
|
239
|
+
assert_equal true, d_csv.run
|
235
240
|
end
|
236
241
|
|
237
242
|
def test_write_with_json
|
238
243
|
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
239
244
|
d_json = create_driver(CONFIG_JSON)
|
240
245
|
emit_json(d_json)
|
241
|
-
d_json.run
|
246
|
+
assert_equal true, d_json.run
|
242
247
|
end
|
243
248
|
|
244
249
|
def test_write_with_json_hash_value
|
245
|
-
setup_mocks("val_a\t
|
250
|
+
setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
246
251
|
d_json = create_driver(CONFIG_JSON)
|
247
252
|
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
|
248
253
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
249
|
-
d_json.run
|
254
|
+
assert_equal true, d_json.run
|
250
255
|
end
|
251
256
|
|
252
257
|
def test_write_with_json_array_value
|
253
|
-
setup_mocks("val_a\t
|
258
|
+
setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
254
259
|
d_json = create_driver(CONFIG_JSON)
|
255
260
|
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
|
256
261
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
257
|
-
d_json.run
|
262
|
+
assert_equal true, d_json.run
|
263
|
+
end
|
264
|
+
|
265
|
+
def test_write_with_json_including_tab_newline_quote
|
266
|
+
setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
267
|
+
d_json = create_driver(CONFIG_JSON)
|
268
|
+
d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
|
269
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
270
|
+
assert_equal true, d_json.run
|
258
271
|
end
|
259
272
|
|
260
273
|
def test_write_with_json_no_data
|
@@ -262,7 +275,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
262
275
|
d_json = create_driver(CONFIG_JSON)
|
263
276
|
d_json.emit("", DEFAULT_TIME)
|
264
277
|
d_json.emit("", DEFAULT_TIME)
|
265
|
-
d_json.run
|
278
|
+
assert_equal false, d_json.run
|
266
279
|
end
|
267
280
|
|
268
281
|
def test_write_with_json_invalid_one_line
|
@@ -270,7 +283,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
270
283
|
d_json = create_driver(CONFIG_JSON)
|
271
284
|
d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
|
272
285
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
273
|
-
d_json.run
|
286
|
+
assert_equal true, d_json.run
|
274
287
|
end
|
275
288
|
|
276
289
|
def test_write_with_json_no_available_data
|
@@ -278,7 +291,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
278
291
|
d_json = create_driver(CONFIG_JSON)
|
279
292
|
d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
|
280
293
|
d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
|
281
|
-
d_json.run
|
294
|
+
assert_equal true, d_json.run
|
282
295
|
end
|
283
296
|
|
284
297
|
def test_write_redshift_connection_error
|
@@ -300,14 +313,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
300
313
|
}
|
301
314
|
end
|
302
315
|
|
303
|
-
def
|
316
|
+
def test_write_redshift_load_error
|
304
317
|
PG::Error.module_eval { attr_accessor :result}
|
305
318
|
def PG.connect(dbinfo)
|
306
319
|
return Class.new do
|
307
320
|
def initialize(return_keys=[]); end
|
308
321
|
def exec(sql)
|
309
|
-
error = PG::Error.new("
|
310
|
-
error.result = "
|
322
|
+
error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
|
323
|
+
error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
|
311
324
|
raise error
|
312
325
|
end
|
313
326
|
def close; end
|
@@ -317,9 +330,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
317
330
|
|
318
331
|
d_csv = create_driver
|
319
332
|
emit_csv(d_csv)
|
320
|
-
|
321
|
-
d_csv.run
|
322
|
-
}
|
333
|
+
assert_equal false, d_csv.run
|
323
334
|
end
|
324
335
|
|
325
336
|
def test_write_with_json_redshift_connection_error
|
@@ -356,9 +367,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
356
367
|
|
357
368
|
d_json = create_driver(CONFIG_JSON)
|
358
369
|
emit_json(d_json)
|
359
|
-
|
360
|
-
d_json.run
|
361
|
-
}
|
370
|
+
assert_equal false, d_json.run
|
362
371
|
end
|
363
372
|
|
364
373
|
def test_write_with_json_failed_to_get_columns
|
@@ -379,17 +388,4 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
379
388
|
}
|
380
389
|
end
|
381
390
|
|
382
|
-
def test_write_with_json_failed_to_generate_tsv
|
383
|
-
flexmock(CSV).should_receive(:generate).with_any_args.
|
384
|
-
and_return {
|
385
|
-
raise "failed to generate tsv."
|
386
|
-
}
|
387
|
-
setup_s3_mock("")
|
388
|
-
|
389
|
-
d_json = create_driver(CONFIG_JSON)
|
390
|
-
emit_json(d_json)
|
391
|
-
assert_nothing_raised {
|
392
|
-
d_json.run
|
393
|
-
}
|
394
|
-
end
|
395
391
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|