fluent-plugin-parser 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -65,6 +65,20 @@ Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
65
65
 
66
66
  KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
67
67
 
68
+ About LTSV, see: http://ltsv.org/
69
+
70
+ If you want to suppress 'pattern not match' log, specify 'suppress_parse_error_log true' to configuration.
71
+ default value is false.
72
+
73
+ <match in.hogelog>
74
+ type parser
75
+ tag hogelog
76
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
77
+ key_name message
78
+ suppress_parse_error_log true
79
+ </match>
80
+
81
+
68
82
  ### DeparserOutput
69
83
 
70
84
  To build CSV from field 'store','item','num', as field 'csv', without raw data:
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.2.1"
4
+ gem.version = "0.2.2"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -8,6 +8,7 @@ class FluentExt::TextParser
8
8
  include Fluent::Configurable
9
9
 
10
10
  config_param :time_format, :string, :default => nil
11
+ config_param :suppress_parse_error_log, :bool, :default => false
11
12
 
12
13
  def initialize(regexp, conf={})
13
14
  super()
@@ -20,7 +21,10 @@ class FluentExt::TextParser
20
21
  def call(text)
21
22
  m = @regexp.match(text)
22
23
  unless m
23
- $log.warn "pattern not match: #{text}"
24
+ unless @suppress_parse_error_log
25
+ $log.warn "pattern not match: #{text}"
26
+ end
27
+
24
28
  return nil, nil
25
29
  end
26
30
 
@@ -70,7 +74,10 @@ class FluentExt::TextParser
70
74
  record = Yajl.load(text)
71
75
  return parse_time(record)
72
76
  rescue Yajl::ParseError
73
- $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
77
+ unless @suppress_parse_error_log
78
+ $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
79
+ end
80
+
74
81
  return nil, nil
75
82
  end
76
83
  end
@@ -122,7 +129,10 @@ class FluentExt::TextParser
122
129
  def call(text)
123
130
  m = REGEXP.match(text)
124
131
  unless m
125
- $log.warn "pattern not match: #{text.inspect}"
132
+ unless @suppress_parse_error_log
133
+ $log.warn "pattern not match: #{text.inspect}"
134
+ end
135
+
126
136
  return nil, nil
127
137
  end
128
138
 
@@ -8,6 +8,7 @@ class Fluent::ParserOutput < Fluent::Output
8
8
  config_param :add_prefix, :string, :default => nil
9
9
  config_param :key_name, :string
10
10
  config_param :reserve_data, :bool, :default => false
11
+ config_param :replace_invalid_sequence, :bool, :default => false
11
12
 
12
13
  def initialize
13
14
  super
@@ -56,7 +57,7 @@ class Fluent::ParserOutput < Fluent::Output
56
57
  es.each {|time,record|
57
58
  value = record[@key_name]
58
59
  t,values = if value
59
- @parser.parse(value)
60
+ parse(value)
60
61
  else
61
62
  [nil, nil]
62
63
  end
@@ -72,7 +73,7 @@ class Fluent::ParserOutput < Fluent::Output
72
73
  es.each {|time,record|
73
74
  value = record[@key_name]
74
75
  t,values = if value
75
- @parser.parse(value)
76
+ parse(value)
76
77
  else
77
78
  [nil, nil]
78
79
  end
@@ -84,4 +85,27 @@ class Fluent::ParserOutput < Fluent::Output
84
85
  end
85
86
  chain.next
86
87
  end
88
+
89
+ private
90
+
91
+ def parse(string)
92
+ return @parser.parse(string) unless @replace_invalid_sequence
93
+
94
+ begin
95
+ @parser.parse(string)
96
+ rescue ArgumentError => e
97
+ unless e.message.index("invalid byte sequence in") == 0
98
+ raise
99
+ end
100
+ replaced_string = replace_invalid_byte(string)
101
+ @parser.parse(replaced_string)
102
+ end
103
+ end
104
+
105
+ def replace_invalid_byte(string)
106
+ replace_options = { invalid: :replace, undef: :replace, replace: '?' }
107
+ original_encoding = string.encoding
108
+ temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
109
+ string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
110
+ end
87
111
  end
@@ -67,6 +67,22 @@ class ParserOutputTest < Test::Unit::TestCase
67
67
  key_name foo
68
68
  ]
69
69
  }
70
+ assert_nothing_raised {
71
+ d = create_driver %[
72
+ tag hogelog
73
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
74
+ key_name message
75
+ suppress_parse_error_log true
76
+ ]
77
+ }
78
+ assert_nothing_raised {
79
+ d = create_driver %[
80
+ tag hogelog
81
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
82
+ key_name message
83
+ suppress_parse_error_log false
84
+ ]
85
+ }
70
86
  d = create_driver %[
71
87
  tag foo.bar
72
88
  key_name foo
@@ -315,4 +331,136 @@ class ParserOutputTest < Test::Unit::TestCase
315
331
  #TODO: apache2
316
332
  # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
317
333
 
334
+ CONFIG_NOT_REPLACE = %[
335
+ remove_prefix test
336
+ key_name data
337
+ format /^(?<message>.*)$/
338
+ ]
339
+ CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
340
+ replace_invalid_sequence true
341
+ ]
342
+ def test_emit_invalid_byte
343
+ invalid_utf8 = "\xff".force_encoding('UTF-8')
344
+
345
+ d = create_driver(CONFIG_NOT_REPLACE, 'test.in')
346
+ assert_raise(ArgumentError) {
347
+ d.run do
348
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
349
+ end
350
+ }
351
+
352
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
353
+ assert_nothing_raised {
354
+ d.run do
355
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
356
+ end
357
+ }
358
+ emits = d.emits
359
+ assert_equal 1, emits.length
360
+ assert_nil emits[0][2]['data']
361
+ assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
362
+
363
+ d = create_driver(CONFIG_INVALID_BYTE + %[
364
+ reserve_data yes
365
+ ], 'test.in')
366
+ assert_nothing_raised {
367
+ d.run do
368
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
369
+ end
370
+ }
371
+ emits = d.emits
372
+ assert_equal 1, emits.length
373
+ assert_equal invalid_utf8, emits[0][2]['data']
374
+ assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
375
+
376
+ invalid_ascii = "\xff".force_encoding('US-ASCII')
377
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
378
+ assert_nothing_raised {
379
+ d.run do
380
+ d.emit({'data' => invalid_ascii}, Time.now.to_i)
381
+ end
382
+ }
383
+ emits = d.emits
384
+ assert_equal 1, emits.length
385
+ assert_nil emits[0][2]['data']
386
+ assert_equal '?'.force_encoding('US-ASCII'), emits[0][2]['message']
387
+ end
388
+
389
+ # suppress_parse_error_log test
390
+ CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
391
+ tag hogelog
392
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
393
+ key_name message
394
+ suppress_parse_error_log false
395
+ ]
396
+ CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
397
+ tag hogelog
398
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
399
+ key_name message
400
+ suppress_parse_error_log true
401
+ ]
402
+ CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
403
+ tag hogelog
404
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
405
+ key_name message
406
+ ]
407
+
408
+ INVALID_MESSAGE = 'foo bar'
409
+ VALID_MESSAGE = 'col1=foo col2=bar'
410
+
411
+ # if call warn() raise exception
412
+ class DummyLoggerWarnedException < StandardError; end
413
+ class DummyLogger
414
+ def warn(message)
415
+ raise DummyLoggerWarnedException
416
+ end
417
+ end
418
+
419
+ def test_suppress_parse_error_log
420
+ # default(disabled) 'suppress_parse_error_log' is not specify
421
+ d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
422
+
423
+ saved_logger = $log
424
+ $log = DummyLogger.new
425
+
426
+ assert_raise(DummyLoggerWarnedException) {
427
+ d.run do
428
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
429
+ end
430
+ }
431
+
432
+ assert_nothing_raised {
433
+ d.run do
434
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
435
+ end
436
+ }
437
+
438
+ # disabled 'suppress_parse_error_log'
439
+ d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
440
+
441
+ assert_raise(DummyLoggerWarnedException) {
442
+ d.run do
443
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
444
+ end
445
+ }
446
+
447
+ assert_nothing_raised {
448
+ d.run do
449
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
450
+ end
451
+ }
452
+
453
+ # enabled 'suppress_parse_error_log'
454
+ d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
455
+
456
+ assert_nothing_raised {
457
+ d.run do
458
+ d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
459
+ d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
460
+ end
461
+ }
462
+
463
+ $log = saved_logger
464
+ end
465
+
318
466
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-07 00:00:00.000000000 Z
12
+ date: 2013-02-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake