fluent-plugin-redshift 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -4,6 +4,9 @@ module Fluent
4
4
  class RedshiftOutput < BufferedOutput
5
5
  Fluent::Plugin.register_output('redshift', self)
6
6
 
7
+ # ignore load table error. (invalid data format)
8
+ IGNORE_REDSHIFT_ERROR_REGEXP = /^ERROR: Load into table '[^']+' failed\./
9
+
7
10
  def initialize
8
11
  super
9
12
  require 'aws-sdk'
@@ -34,6 +37,8 @@ class RedshiftOutput < BufferedOutput
34
37
  # file format
35
38
  config_param :file_type, :string, :default => nil # json, tsv, csv
36
39
  config_param :delimiter, :string, :default => nil
40
+ # for debug
41
+ config_param :log_suffix, :string, :default => ''
37
42
 
38
43
  def configure(conf)
39
44
  super
@@ -48,8 +53,8 @@ class RedshiftOutput < BufferedOutput
48
53
  password:@redshift_password
49
54
  }
50
55
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
51
- $log.debug "redshift file_type:#{@file_type} delimiter:'#{@delimiter}'"
52
- @copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' REMOVEQUOTES GZIP;"
56
+ $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
57
+ @copy_sql_template = "copy #{@redshift_tablename} from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP TRUNCATECOLUMNS ESCAPE FILLRECORD ACCEPTANYDATE;"
53
58
  end
54
59
 
55
60
  def start
@@ -69,6 +74,8 @@ class RedshiftOutput < BufferedOutput
69
74
  end
70
75
 
71
76
  def write(chunk)
77
+ $log.debug format_log("start creating gz.")
78
+
72
79
  # create a gz file
73
80
  tmp = Tempfile.new("s3-")
74
81
  tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
@@ -76,8 +83,8 @@ class RedshiftOutput < BufferedOutput
76
83
 
77
84
  # no data -> skip
78
85
  unless tmp
79
- $log.debug "received no valid data. "
80
- return
86
+ $log.debug format_log("received no valid data. ")
87
+ return false # for debug
81
88
  end
82
89
 
83
90
  # create a file path with time format
@@ -89,18 +96,25 @@ class RedshiftOutput < BufferedOutput
89
96
  # copy gz on s3 to redshift
90
97
  s3_uri = "s3://#{@s3_bucket}/#{s3path}"
91
98
  sql = @copy_sql_template % [s3_uri, @aws_sec_key]
92
- $log.debug "start copying. s3_uri=#{s3_uri}"
99
+ $log.debug format_log("start copying. s3_uri=#{s3_uri}")
93
100
  conn = nil
94
101
  begin
95
102
  conn = PG.connect(@db_conf)
96
103
  conn.exec(sql)
97
- $log.info "completed copying to redshift. s3_uri=#{s3_uri}"
104
+ $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
98
105
  rescue PG::Error => e
99
- $log.error "failed to copy data into redshift. sql=#{s3_uri}", :error=>e.to_s
100
- raise e if e.result.nil? # retry if connection errors
106
+ $log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
107
+ raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
108
+ return false # for debug
101
109
  ensure
102
110
  conn.close rescue nil if conn
103
111
  end
112
+ true # for debug
113
+ end
114
+
115
+ protected
116
+ def format_log(message)
117
+ (@log_suffix and not @log_suffix.empty?) ? "#{message} #{@log_suffix}" : message
104
118
  end
105
119
 
106
120
  private
@@ -125,27 +139,24 @@ class RedshiftOutput < BufferedOutput
125
139
  if redshift_table_columns == nil
126
140
  raise "failed to fetch the redshift table definition."
127
141
  elsif redshift_table_columns.empty?
128
- $log.warn "no table on redshift. table_name=#{@redshift_tablename}"
142
+ $log.warn format_log("no table on redshift. table_name=#{@redshift_tablename}")
129
143
  return nil
130
144
  end
131
145
 
132
146
  # convert json to tsv format text
133
- table_texts = ""
134
- chunk.msgpack_each do |record|
135
- begin
136
- table_texts << json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
137
- rescue => e
138
- $log.error "failed to create table text from json. text=(#{record[@record_log_tag]})", :error=>$!.to_s
139
- $log.error_backtrace
140
- end
141
- end
142
- return nil if table_texts.empty?
143
-
144
- # create gz
145
147
  gzw = nil
146
148
  begin
147
149
  gzw = Zlib::GzipWriter.new(dst_file)
148
- gzw.write(table_texts)
150
+ chunk.msgpack_each do |record|
151
+ begin
152
+ tsv_text = json_to_table_text(redshift_table_columns, record[@record_log_tag], delimiter)
153
+ gzw.write(tsv_text) if tsv_text and not tsv_text.empty?
154
+ rescue => e
155
+ $log.error format_log("failed to create table text from json. text=(#{record[@record_log_tag]})"), :error=>$!.to_s
156
+ $log.error_backtrace
157
+ end
158
+ end
159
+ return nil unless gzw.pos > 0
149
160
  ensure
150
161
  gzw.close rescue nil if gzw
151
162
  end
@@ -185,7 +196,7 @@ class RedshiftOutput < BufferedOutput
185
196
  begin
186
197
  json_obj = JSON.parse(json_text)
187
198
  rescue => e
188
- $log.warn "failed to parse json. ", :error=>e.to_s
199
+ $log.warn format_log("failed to parse json. "), :error=>e.to_s
189
200
  return ""
190
201
  end
191
202
  return "" unless json_obj
@@ -198,19 +209,22 @@ class RedshiftOutput < BufferedOutput
198
209
  val.to_s unless val.nil?
199
210
  end
200
211
  if val_list.all?{|v| v.nil? or v.empty?}
201
- $log.warn "no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}"
212
+ $log.warn format_log("no data match for table columns on redshift. json_text=#{json_text} table_columns=#{redshift_table_columns}")
202
213
  return ""
203
214
  end
204
215
 
205
- # generate tsv text
206
- begin
207
- CSV.generate(:col_sep=>delimiter, :quote_char => '"') do |row|
208
- row << val_list # inlude new line
216
+ generate_line_with_delimiter(val_list, delimiter)
217
+ end
218
+
219
+ def generate_line_with_delimiter(val_list, delimiter)
220
+ val_list = val_list.collect do |val|
221
+ if val.nil? or val.empty?
222
+ ""
223
+ else
224
+ val.gsub(/\\/, "\\\\\\").gsub(/\t/, "\\\t").gsub(/\n/, "\\\n") # escape tab, newline and backslash
209
225
  end
210
- rescue => e
211
- $log.debug "failed to generate csv val_list:#{val_list} delimiter:(#{delimiter})"
212
- raise e
213
226
  end
227
+ val_list.join(delimiter) + "\n"
214
228
  end
215
229
 
216
230
  def create_s3path(bucket, path)
@@ -26,6 +26,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
26
26
  redshift_tablename test_table
27
27
  buffer_type memory
28
28
  utc
29
+ log_suffix id:5 host:localhost
29
30
  ]
30
31
  CONFIG_CSV= %[
31
32
  #{CONFIG_BASE}
@@ -127,6 +128,10 @@ class RedshiftOutputTest < Test::Unit::TestCase
127
128
  assert_equal "pipe", d4.instance.file_type
128
129
  assert_equal "|", d4.instance.delimiter
129
130
  end
131
+ def test_configure_no_log_suffix
132
+ d = create_driver(CONFIG_CSV.gsub(/ *log_suffix *.+$/, ''))
133
+ assert_equal "", d.instance.log_suffix
134
+ end
130
135
 
131
136
  def emit_csv(d)
132
137
  d.emit(RECORD_CSV_A, DEFAULT_TIME)
@@ -231,30 +236,38 @@ class RedshiftOutputTest < Test::Unit::TestCase
231
236
  setup_mocks(%[val_a,val_b,val_c,val_d\nval_e,val_f,val_g,val_h\n])
232
237
  d_csv = create_driver
233
238
  emit_csv(d_csv)
234
- d_csv.run
239
+ assert_equal true, d_csv.run
235
240
  end
236
241
 
237
242
  def test_write_with_json
238
243
  setup_mocks(%[val_a\tval_b\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n])
239
244
  d_json = create_driver(CONFIG_JSON)
240
245
  emit_json(d_json)
241
- d_json.run
246
+ assert_equal true, d_json.run
242
247
  end
243
248
 
244
249
  def test_write_with_json_hash_value
245
- setup_mocks("val_a\t\"{\"\"foo\"\":\"\"var\"\"}\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
250
+ setup_mocks("val_a\t{\"foo\":\"var\"}\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
246
251
  d_json = create_driver(CONFIG_JSON)
247
252
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : {"foo" : "var"}}]} , DEFAULT_TIME)
248
253
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
249
- d_json.run
254
+ assert_equal true, d_json.run
250
255
  end
251
256
 
252
257
  def test_write_with_json_array_value
253
- setup_mocks("val_a\t\"[\"\"foo\"\",\"\"var\"\"]\"\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
258
+ setup_mocks("val_a\t[\"foo\",\"var\"]\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
254
259
  d_json = create_driver(CONFIG_JSON)
255
260
  d_json.emit({"log" => %[{"key_a" : "val_a", "key_b" : ["foo", "var"]}]} , DEFAULT_TIME)
256
261
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
257
- d_json.run
262
+ assert_equal true, d_json.run
263
+ end
264
+
265
+ def test_write_with_json_including_tab_newline_quote
266
+ setup_mocks("val_a_with_\\\t_tab_\\\n_newline\tval_b_with_\\\\_quote\t\t\t\t\t\t\n\t\tval_c\tval_d\t\t\t\t\n")
267
+ d_json = create_driver(CONFIG_JSON)
268
+ d_json.emit({"log" => %[{"key_a" : "val_a_with_\\t_tab_\\n_newline", "key_b" : "val_b_with_\\\\_quote"}]} , DEFAULT_TIME)
269
+ d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
270
+ assert_equal true, d_json.run
258
271
  end
259
272
 
260
273
  def test_write_with_json_no_data
@@ -262,7 +275,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
262
275
  d_json = create_driver(CONFIG_JSON)
263
276
  d_json.emit("", DEFAULT_TIME)
264
277
  d_json.emit("", DEFAULT_TIME)
265
- d_json.run
278
+ assert_equal false, d_json.run
266
279
  end
267
280
 
268
281
  def test_write_with_json_invalid_one_line
@@ -270,7 +283,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
270
283
  d_json = create_driver(CONFIG_JSON)
271
284
  d_json.emit({"log" => %[}}]}, DEFAULT_TIME)
272
285
  d_json.emit(RECORD_JSON_B, DEFAULT_TIME)
273
- d_json.run
286
+ assert_equal true, d_json.run
274
287
  end
275
288
 
276
289
  def test_write_with_json_no_available_data
@@ -278,7 +291,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
278
291
  d_json = create_driver(CONFIG_JSON)
279
292
  d_json.emit(RECORD_JSON_A, DEFAULT_TIME)
280
293
  d_json.emit({"log" => %[{"key_o" : "val_o", "key_p" : "val_p"}]}, DEFAULT_TIME)
281
- d_json.run
294
+ assert_equal true, d_json.run
282
295
  end
283
296
 
284
297
  def test_write_redshift_connection_error
@@ -300,14 +313,14 @@ class RedshiftOutputTest < Test::Unit::TestCase
300
313
  }
301
314
  end
302
315
 
303
- def test_write_redshift_logic_error
316
+ def test_write_redshift_load_error
304
317
  PG::Error.module_eval { attr_accessor :result}
305
318
  def PG.connect(dbinfo)
306
319
  return Class.new do
307
320
  def initialize(return_keys=[]); end
308
321
  def exec(sql)
309
- error = PG::Error.new("redshift logic error")
310
- error.result = "logic error"
322
+ error = PG::Error.new("ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details.")
323
+ error.result = "ERROR: Load into table 'apache_log' failed. Check 'stl_load_errors' system table for details."
311
324
  raise error
312
325
  end
313
326
  def close; end
@@ -317,9 +330,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
317
330
 
318
331
  d_csv = create_driver
319
332
  emit_csv(d_csv)
320
- assert_nothing_raised {
321
- d_csv.run
322
- }
333
+ assert_equal false, d_csv.run
323
334
  end
324
335
 
325
336
  def test_write_with_json_redshift_connection_error
@@ -356,9 +367,7 @@ class RedshiftOutputTest < Test::Unit::TestCase
356
367
 
357
368
  d_json = create_driver(CONFIG_JSON)
358
369
  emit_json(d_json)
359
- assert_nothing_raised {
360
- d_json.run
361
- }
370
+ assert_equal false, d_json.run
362
371
  end
363
372
 
364
373
  def test_write_with_json_failed_to_get_columns
@@ -379,17 +388,4 @@ class RedshiftOutputTest < Test::Unit::TestCase
379
388
  }
380
389
  end
381
390
 
382
- def test_write_with_json_failed_to_generate_tsv
383
- flexmock(CSV).should_receive(:generate).with_any_args.
384
- and_return {
385
- raise "failed to generate tsv."
386
- }
387
- setup_s3_mock("")
388
-
389
- d_json = create_driver(CONFIG_JSON)
390
- emit_json(d_json)
391
- assert_nothing_raised {
392
- d_json.run
393
- }
394
- end
395
391
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-redshift
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-07 00:00:00.000000000 Z
12
+ date: 2013-06-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd