fluent-plugin-parser 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -65,6 +65,20 @@ Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
65
65
 
66
66
  KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
67
67
 
68
+ About LTSV, see: http://ltsv.org/
69
+
70
+ If you want to suppress 'pattern not match' log, specify 'suppress_parse_error_log true' to configuration.
71
+ default value is false.
72
+
73
+ <match in.hogelog>
74
+ type parser
75
+ tag hogelog
76
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
77
+ key_name message
78
+ suppress_parse_error_log true
79
+ </match>
80
+
81
+
68
82
  ### DeparserOutput
69
83
 
70
84
  To build CSV from field 'store','item','num', as field 'csv', without raw data:
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.2.1"
4
+ gem.version = "0.2.2"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -8,6 +8,7 @@ class FluentExt::TextParser
8
8
  include Fluent::Configurable
9
9
 
10
10
  config_param :time_format, :string, :default => nil
11
+ config_param :suppress_parse_error_log, :bool, :default => false
11
12
 
12
13
  def initialize(regexp, conf={})
13
14
  super()
@@ -20,7 +21,10 @@ class FluentExt::TextParser
20
21
  def call(text)
21
22
  m = @regexp.match(text)
22
23
  unless m
23
- $log.warn "pattern not match: #{text}"
24
+ unless @suppress_parse_error_log
25
+ $log.warn "pattern not match: #{text}"
26
+ end
27
+
24
28
  return nil, nil
25
29
  end
26
30
 
@@ -70,7 +74,10 @@ class FluentExt::TextParser
70
74
  record = Yajl.load(text)
71
75
  return parse_time(record)
72
76
  rescue Yajl::ParseError
73
- $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
77
+ unless @suppress_parse_error_log
78
+ $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
79
+ end
80
+
74
81
  return nil, nil
75
82
  end
76
83
  end
@@ -122,7 +129,10 @@ class FluentExt::TextParser
122
129
  def call(text)
123
130
  m = REGEXP.match(text)
124
131
  unless m
125
- $log.warn "pattern not match: #{text.inspect}"
132
+ unless @suppress_parse_error_log
133
+ $log.warn "pattern not match: #{text.inspect}"
134
+ end
135
+
126
136
  return nil, nil
127
137
  end
128
138
 
@@ -8,6 +8,7 @@ class Fluent::ParserOutput < Fluent::Output
8
8
  config_param :add_prefix, :string, :default => nil
9
9
  config_param :key_name, :string
10
10
  config_param :reserve_data, :bool, :default => false
11
+ config_param :replace_invalid_sequence, :bool, :default => false
11
12
 
12
13
  def initialize
13
14
  super
@@ -56,7 +57,7 @@ class Fluent::ParserOutput < Fluent::Output
56
57
  es.each {|time,record|
57
58
  value = record[@key_name]
58
59
  t,values = if value
59
- @parser.parse(value)
60
+ parse(value)
60
61
  else
61
62
  [nil, nil]
62
63
  end
@@ -72,7 +73,7 @@ class Fluent::ParserOutput < Fluent::Output
72
73
  es.each {|time,record|
73
74
  value = record[@key_name]
74
75
  t,values = if value
75
- @parser.parse(value)
76
+ parse(value)
76
77
  else
77
78
  [nil, nil]
78
79
  end
@@ -84,4 +85,27 @@ class Fluent::ParserOutput < Fluent::Output
84
85
  end
85
86
  chain.next
86
87
  end
88
+
89
+ private
90
+
91
+ def parse(string)
92
+ return @parser.parse(string) unless @replace_invalid_sequence
93
+
94
+ begin
95
+ @parser.parse(string)
96
+ rescue ArgumentError => e
97
+ unless e.message.index("invalid byte sequence in") == 0
98
+ raise
99
+ end
100
+ replaced_string = replace_invalid_byte(string)
101
+ @parser.parse(replaced_string)
102
+ end
103
+ end
104
+
105
+ def replace_invalid_byte(string)
106
+ replace_options = { invalid: :replace, undef: :replace, replace: '?' }
107
+ original_encoding = string.encoding
108
+ temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
109
+ string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
110
+ end
87
111
  end
@@ -67,6 +67,22 @@ class ParserOutputTest < Test::Unit::TestCase
67
67
  key_name foo
68
68
  ]
69
69
  }
70
+ assert_nothing_raised {
71
+ d = create_driver %[
72
+ tag hogelog
73
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
74
+ key_name message
75
+ suppress_parse_error_log true
76
+ ]
77
+ }
78
+ assert_nothing_raised {
79
+ d = create_driver %[
80
+ tag hogelog
81
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
82
+ key_name message
83
+ suppress_parse_error_log false
84
+ ]
85
+ }
70
86
  d = create_driver %[
71
87
  tag foo.bar
72
88
  key_name foo
@@ -315,4 +331,136 @@ class ParserOutputTest < Test::Unit::TestCase
315
331
  #TODO: apache2
316
332
  # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
317
333
 
334
+ CONFIG_NOT_REPLACE = %[
335
+ remove_prefix test
336
+ key_name data
337
+ format /^(?<message>.*)$/
338
+ ]
339
+ CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
340
+ replace_invalid_sequence true
341
+ ]
342
+ def test_emit_invalid_byte
343
+ invalid_utf8 = "\xff".force_encoding('UTF-8')
344
+
345
+ d = create_driver(CONFIG_NOT_REPLACE, 'test.in')
346
+ assert_raise(ArgumentError) {
347
+ d.run do
348
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
349
+ end
350
+ }
351
+
352
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
353
+ assert_nothing_raised {
354
+ d.run do
355
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
356
+ end
357
+ }
358
+ emits = d.emits
359
+ assert_equal 1, emits.length
360
+ assert_nil emits[0][2]['data']
361
+ assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
362
+
363
+ d = create_driver(CONFIG_INVALID_BYTE + %[
364
+ reserve_data yes
365
+ ], 'test.in')
366
+ assert_nothing_raised {
367
+ d.run do
368
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
369
+ end
370
+ }
371
+ emits = d.emits
372
+ assert_equal 1, emits.length
373
+ assert_equal invalid_utf8, emits[0][2]['data']
374
+ assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
375
+
376
+ invalid_ascii = "\xff".force_encoding('US-ASCII')
377
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
378
+ assert_nothing_raised {
379
+ d.run do
380
+ d.emit({'data' => invalid_ascii}, Time.now.to_i)
381
+ end
382
+ }
383
+ emits = d.emits
384
+ assert_equal 1, emits.length
385
+ assert_nil emits[0][2]['data']
386
+ assert_equal '?'.force_encoding('US-ASCII'), emits[0][2]['message']
387
+ end
388
+
389
+ # suppress_parse_error_log test
390
+ CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
391
+ tag hogelog
392
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
393
+ key_name message
394
+ suppress_parse_error_log false
395
+ ]
396
+ CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
397
+ tag hogelog
398
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
399
+ key_name message
400
+ suppress_parse_error_log true
401
+ ]
402
+ CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
403
+ tag hogelog
404
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
405
+ key_name message
406
+ ]
407
+
408
+ INVALID_MESSAGE = 'foo bar'
409
+ VALID_MESSAGE = 'col1=foo col2=bar'
410
+
411
+ # if call warn() raise exception
412
+ class DummyLoggerWarnedException < StandardError; end
413
+ class DummyLogger
414
+ def warn(message)
415
+ raise DummyLoggerWarnedException
416
+ end
417
+ end
418
+
419
+ def test_suppress_parse_error_log
420
+ # default(disabled) 'suppress_parse_error_log' is not specify
421
+ d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
422
+
423
+ saved_logger = $log
424
+ $log = DummyLogger.new
425
+
426
+ assert_raise(DummyLoggerWarnedException) {
427
+ d.run do
428
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
429
+ end
430
+ }
431
+
432
+ assert_nothing_raised {
433
+ d.run do
434
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
435
+ end
436
+ }
437
+
438
+ # disabled 'suppress_parse_error_log'
439
+ d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
440
+
441
+ assert_raise(DummyLoggerWarnedException) {
442
+ d.run do
443
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
444
+ end
445
+ }
446
+
447
+ assert_nothing_raised {
448
+ d.run do
449
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
450
+ end
451
+ }
452
+
453
+ # enabled 'suppress_parse_error_log'
454
+ d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
455
+
456
+ assert_nothing_raised {
457
+ d.run do
458
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
459
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
460
+ end
461
+ }
462
+
463
+ $log = saved_logger
464
+ end
465
+
318
466
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-07 00:00:00.000000000 Z
12
+ date: 2013-02-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake