fluent-plugin-redshift 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/fluent/plugin/out_redshift.rb +45 -31
- data/test/plugin/test_out_redshift.rb +27 -31
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
@@ -4,6 +4,9 @@ module Fluent
|
|
4
4
|
class RedshiftOutput < BufferedOutput
|
5
5
|
Fluent::Plugin.register_output('redshift', self)
|
6
6
|
|
7
|
+
# ignore load table error. (invalid data format)
|
8
|
+
IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
|
9
|
+
|
7
10
|
def initialize
|
8
11
|
super
|
9
12
|
require 'aws-sdk'
|
@@ -34,6 +37,8 @@ class RedshiftOutput < BufferedOutput
|
|
34
37
|
# file format
|
35
38
|
config_param :file_type, :string, :default => nil # json, tsv, csv
|
36
39
|
config_param :delimiter, :string, :default => nil
|
40
|
+
# for debug
|
41
|
+
config_param :log_suffix, :string, :default => ''
|
37
42
|
|
38
43
|
def configure(conf)
|
39
44
|
super
|
@@ -48,8 +53,8 @@ class RedshiftOutput < BufferedOutput
|
|
48
53
|
password:@redshift_password
|
49
54
|
}
|
50
55
|
@delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
|
51
|
-
$log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
|
52
|
-
@copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}'
|
56
|
+
$log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
|
57
|
+
@copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP TRUNCATECOLUMNS ESCAPE FILLRECORD ACCEPTANYDATE;"
|
53
58
|
end
|
54
59
|
|
55
60
|
def start
|
@@ -69,6 +74,8 @@ class RedshiftOutput < BufferedOutput
|
|
69
74
|
end
|
70
75
|
|
71
76
|
def write(chunk)
|
77
|
+
$log.debug format_log("start creating gz.")
|
78
|
+
|
72
79
|
# create a gz file
|
73
80
|
tmp = Tempfile.new("s3-")
|
74
81
|
tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
|
@@ -76,8 +83,8 @@ class RedshiftOutput < BufferedOutput
|
|
76
83
|
|
77
84
|
# no data -> skip
|
78
85
|
unless tmp
|
79
|
-
$log.debug "received no valid data. "
|
80
|
-
return
|
86
|
+
$log.debug format_log("received no valid data. ")
|
87
|
+
return false # for debug
|
81
88
|
end
|
82
89
|
|
83
90
|
# create a file path with time format
|
@@ -89,18 +96,25 @@ class RedshiftOutput < BufferedOutput
|
|
89
96
|
# copy gz on s3 to redshift
|
90
97
|
s3_uri = "s3://#{@s3_bucket}/#{s3path}"
|
91
98
|
sql = @copy_sql_template % [s3_uri, @aws_sec_key]
|
92
|
-
$log.debug
|
99
|
+
$log.debug format_log("start copying. s3_uri=#{s3_uri}")
|
93
100
|
conn = nil
|
94
101
|
begin
|
95
102
|
conn = PG.connect(@db_conf)
|
96
103
|
conn.exec(sql)
|
97
|
-
$log.info "completed copying to redshift. s3_uri=#{s3_uri}"
|
104
|
+
$log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
|
98
105
|
rescue PG::Error => e
|
99
|
-
$log.error "failed to copy data into redshift.
|
100
|
-
raise e
|
106
|
+
$log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
|
107
|
+
raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
|
108
|
+
return false # for debug
|
101
109
|
ensure
|
102
110
|
conn.close rescue nil if conn
|
103
111
|
end
|
112
|
+
true # for debug
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
def format_log(message)
|
117
|
+
(@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
|
104
118
|
end
|
105
119
|
|
106
120
|
private
|
@@ -125,27 +139,24 @@ class RedshiftOutput < BufferedOutput
|
|
125
139
|
if redshift_table_columns == nil
|
126
140
|
raise "failed to fetch the redshift table definition."
|
127
141
|
elsif redshift_table_columns.empty?
|
128
|
-
$log.warn "no table on redshift. table_name=#{@redshift_tablename}"
|
142
|
+
$log.warn format_log("no table on redshift. table_name=#{@redshift_tablename}")
|
129
143
|
return nil
|
130
144
|
end
|
131
145
|
|
132
146
|
# convert json to tsv format text
|
133
|
-
table_texts = ""
|
134
|
-
chunk.msgpack_each do |record|
|
135
|
-
begin
|
136
|
-
table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
|
137
|
-
rescue => e
|
138
|
-
$log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
|
139
|
-
$log.error_backtrace
|
140
|
-
end
|
141
|
-
end
|
142
|
-
return nil if table_texts.empty?
|
143
|
-
|
144
|
-
# create gz
|
145
147
|
gzw = nil
|
146
148
|
begin
|
147
149
|
gzw = Zlib::GzipWriter.new(dst_file)
|
148
|
-
|
150
|
+
chunk.msgpack_each do |record|
|
151
|
+
begin
|
152
|
+
tsv_text = json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
|
153
|
+
gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
|
154
|
+
rescue => e
|
155
|
+
$log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>$!.to_s
|
156
|
+
$log.error_backtrace
|
157
|
+
end
|
158
|
+
end
|
159
|
+
return nil unless gzw.pos > 0
|
149
160
|
ensure
|
150
161
|
gzw.close rescue nil if gzw
|
151
162
|
end
|
@@ -185,7 +196,7 @@ class RedshiftOutput < BufferedOutput
|
|
185
196
|
begin
|
186
197
|
json_obj = JSON.parse(json_text)
|
187
198
|
rescue => e
|
188
|
-
$log.warn "failed to parse json. ", :error=>e.to_s
|
199
|
+
$log.warn format_log("failed to parse json. "), :error=>e.to_s
|
189
200
|
return ""
|
190
201
|
end
|
191
202
|
return "" unless json_obj
|
@@ -198,19 +209,22 @@ class RedshiftOutput < BufferedOutput
|
|
198
209
|
val.to_s unless val.nil?
|
199
210
|
end
|
200
211
|
if val_list.all?{|v| v.nil? or v.empty?}
|
201
|
-
$log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
|
212
|
+
$log.warn format_log("no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}")
|
202
213
|
return ""
|
203
214
|
end
|
204
215
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
216
|
+
generate_line_with_delimiter(val_list, delimiter)
|
217
|
+
end
|
218
|
+
|
219
|
+
def generate_line_with_delimiter(val_list, delimiter)
|
220
|
+
val_list = val_list.collect do |val|
|
221
|
+
if val.nil? or val.empty?
|
222
|
+
""
|
223
|
+
else
|
224
|
+
val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
|
209
225
|
end
|
210
|
-
rescue => e
|
211
|
-
$log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
|
212
|
-
raise e
|
213
226
|
end
|
227
|
+
val_list.join(delimiter) + "\n"
|
214
228
|
end
|
215
229
|
|
216
230
|
def create_s3path(bucket, path)
|
@@ -26,6 +26,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
26
26
|
redshift_tablename test_table
|
27
27
|
buffer_type memory
|
28
28
|
utc
|
29
|
+
log_suffix id:5 host:localhost
|
29
30
|
]
|
30
31
|
CONFIG_CSV= %[
|
31
32
|
#{CONFIG_BASE}
|
@@ -127,6 +128,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
127
128
|
assert_equal "pipe", d4.instance.file_type
|
128
129
|
assert_equal "|", d4.instance.delimiter
|
129
130
|
end
|
131
|
+
def test_configure_no_log_suffix
|
132
|
+
d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
|
133
|
+
assert_equal "", d.instance.log_suffix
|
134
|
+
end
|
130
135
|
|
131
136
|
def emit_csv(d)
|
132
137
|
d.emit(RECORD_CSV_A, DEFAULT_TIME)
|
@@ -231,30 +236,38 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
231
236
|
setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
|
232
237
|
d_csv = create_driver
|
233
238
|
emit_csv(d_csv)
|
234
|
-
d_csv.run
|
239
|
+
assert_equal true, d_csv.run
|
235
240
|
end
|
236
241
|
|
237
242
|
def test_write_with_json
|
238
243
|
setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
|
239
244
|
d_json = create_driver(CONFIG_JSON)
|
240
245
|
emit_json(d_json)
|
241
|
-
d_json.run
|
246
|
+
assert_equal true, d_json.run
|
242
247
|
end
|
243
248
|
|
244
249
|
def test_write_with_json_hash_value
|
245
|
-
setup_mocks("val_a\t
|
250
|
+
setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
246
251
|
d_json = create_driver(CONFIG_JSON)
|
247
252
|
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
|
248
253
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
249
|
-
d_json.run
|
254
|
+
assert_equal true, d_json.run
|
250
255
|
end
|
251
256
|
|
252
257
|
def test_write_with_json_array_value
|
253
|
-
setup_mocks("val_a\t
|
258
|
+
setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
254
259
|
d_json = create_driver(CONFIG_JSON)
|
255
260
|
d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
|
256
261
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
257
|
-
d_json.run
|
262
|
+
assert_equal true, d_json.run
|
263
|
+
end
|
264
|
+
|
265
|
+
def test_write_with_json_including_tab_newline_quote
|
266
|
+
setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
|
267
|
+
d_json = create_driver(CONFIG_JSON)
|
268
|
+
d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
|
269
|
+
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
270
|
+
assert_equal true, d_json.run
|
258
271
|
end
|
259
272
|
|
260
273
|
def test_write_with_json_no_data
|
@@ -262,7 +275,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
262
275
|
d_json = create_driver(CONFIG_JSON)
|
263
276
|
d_json.emit("", DEFAULT_TIME)
|
264
277
|
d_json.emit("", DEFAULT_TIME)
|
265
|
-
d_json.run
|
278
|
+
assert_equal false, d_json.run
|
266
279
|
end
|
267
280
|
|
268
281
|
def test_write_with_json_invalid_one_line
|
@@ -270,7 +283,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
270
283
|
d_json = create_driver(CONFIG_JSON)
|
271
284
|
d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
|
272
285
|
d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
|
273
|
-
d_json.run
|
286
|
+
assert_equal true, d_json.run
|
274
287
|
end
|
275
288
|
|
276
289
|
def test_write_with_json_no_available_data
|
@@ -278,7 +291,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
278
291
|
d_json = create_driver(CONFIG_JSON)
|
279
292
|
d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
|
280
293
|
d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
|
281
|
-
d_json.run
|
294
|
+
assert_equal true, d_json.run
|
282
295
|
end
|
283
296
|
|
284
297
|
def test_write_redshift_connection_error
|
@@ -300,14 +313,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
300
313
|
}
|
301
314
|
end
|
302
315
|
|
303
|
-
def
|
316
|
+
def test_write_redshift_load_error
|
304
317
|
PG::Error.module_eval { attr_accessor :result}
|
305
318
|
def PG.connect(dbinfo)
|
306
319
|
return Class.new do
|
307
320
|
def initialize(return_keys=[]); end
|
308
321
|
def exec(sql)
|
309
|
-
error = PG::Error.new("
|
310
|
-
error.result = "
|
322
|
+
error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
|
323
|
+
error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
|
311
324
|
raise error
|
312
325
|
end
|
313
326
|
def close; end
|
@@ -317,9 +330,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
317
330
|
|
318
331
|
d_csv = create_driver
|
319
332
|
emit_csv(d_csv)
|
320
|
-
|
321
|
-
d_csv.run
|
322
|
-
}
|
333
|
+
assert_equal false, d_csv.run
|
323
334
|
end
|
324
335
|
|
325
336
|
def test_write_with_json_redshift_connection_error
|
@@ -356,9 +367,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
356
367
|
|
357
368
|
d_json = create_driver(CONFIG_JSON)
|
358
369
|
emit_json(d_json)
|
359
|
-
|
360
|
-
d_json.run
|
361
|
-
}
|
370
|
+
assert_equal false, d_json.run
|
362
371
|
end
|
363
372
|
|
364
373
|
def test_write_with_json_failed_to_get_columns
|
@@ -379,17 +388,4 @@ class RedshiftOutputTest < Test::Unit::TestCase
|
|
379
388
|
}
|
380
389
|
end
|
381
390
|
|
382
|
-
def test_write_with_json_failed_to_generate_tsv
|
383
|
-
flexmock(CSV).should_receive(:generate).with_any_args.
|
384
|
-
and_return {
|
385
|
-
raise "failed to generate tsv."
|
386
|
-
}
|
387
|
-
setup_s3_mock("")
|
388
|
-
|
389
|
-
d_json = create_driver(CONFIG_JSON)
|
390
|
-
emit_json(d_json)
|
391
|
-
assert_nothing_raised {
|
392
|
-
d_json.run
|
393
|
-
}
|
394
|
-
end
|
395
391
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|