fluent-plugin-parser 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -0
- data/fluent-plugin-parser.gemspec +1 -1
- data/lib/fluent/plugin/fixed_parser.rb +13 -3
- data/lib/fluent/plugin/out_parser.rb +26 -2
- data/test/plugin/test_out_parser.rb +148 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -65,6 +65,20 @@ Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
|
|
65
65
|
|
66
66
|
KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
|
67
67
|
|
68
|
+
About LTSV, see: http://ltsv.org/
|
69
|
+
|
70
|
+
If you want to suppress 'pattern not match' log, specify 'suppress_parse_error_log true' to configuration.
|
71
|
+
default value is false.
|
72
|
+
|
73
|
+
<match in.hogelog>
|
74
|
+
type parser
|
75
|
+
tag hogelog
|
76
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
77
|
+
key_name message
|
78
|
+
suppress_parse_error_log true
|
79
|
+
</match>
|
80
|
+
|
81
|
+
|
68
82
|
### DeparserOutput
|
69
83
|
|
70
84
|
To build CSV from field 'store','item','num', as field 'csv', without raw data:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.2.
|
4
|
+
gem.version = "0.2.2"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -8,6 +8,7 @@ class FluentExt::TextParser
|
|
8
8
|
include Fluent::Configurable
|
9
9
|
|
10
10
|
config_param :time_format, :string, :default => nil
|
11
|
+
config_param :suppress_parse_error_log, :bool, :default => false
|
11
12
|
|
12
13
|
def initialize(regexp, conf={})
|
13
14
|
super()
|
@@ -20,7 +21,10 @@ class FluentExt::TextParser
|
|
20
21
|
def call(text)
|
21
22
|
m = @regexp.match(text)
|
22
23
|
unless m
|
23
|
-
|
24
|
+
unless @suppress_parse_error_log
|
25
|
+
$log.warn "pattern not match: #{text}"
|
26
|
+
end
|
27
|
+
|
24
28
|
return nil, nil
|
25
29
|
end
|
26
30
|
|
@@ -70,7 +74,10 @@ class FluentExt::TextParser
|
|
70
74
|
record = Yajl.load(text)
|
71
75
|
return parse_time(record)
|
72
76
|
rescue Yajl::ParseError
|
73
|
-
|
77
|
+
unless @suppress_parse_error_log
|
78
|
+
$log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
79
|
+
end
|
80
|
+
|
74
81
|
return nil, nil
|
75
82
|
end
|
76
83
|
end
|
@@ -122,7 +129,10 @@ class FluentExt::TextParser
|
|
122
129
|
def call(text)
|
123
130
|
m = REGEXP.match(text)
|
124
131
|
unless m
|
125
|
-
|
132
|
+
unless @suppress_parse_error_log
|
133
|
+
$log.warn "pattern not match: #{text.inspect}"
|
134
|
+
end
|
135
|
+
|
126
136
|
return nil, nil
|
127
137
|
end
|
128
138
|
|
@@ -8,6 +8,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
8
8
|
config_param :add_prefix, :string, :default => nil
|
9
9
|
config_param :key_name, :string
|
10
10
|
config_param :reserve_data, :bool, :default => false
|
11
|
+
config_param :replace_invalid_sequence, :bool, :default => false
|
11
12
|
|
12
13
|
def initialize
|
13
14
|
super
|
@@ -56,7 +57,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
56
57
|
es.each {|time,record|
|
57
58
|
value = record[@key_name]
|
58
59
|
t,values = if value
|
59
|
-
|
60
|
+
parse(value)
|
60
61
|
else
|
61
62
|
[nil, nil]
|
62
63
|
end
|
@@ -72,7 +73,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
72
73
|
es.each {|time,record|
|
73
74
|
value = record[@key_name]
|
74
75
|
t,values = if value
|
75
|
-
|
76
|
+
parse(value)
|
76
77
|
else
|
77
78
|
[nil, nil]
|
78
79
|
end
|
@@ -84,4 +85,27 @@ class Fluent::ParserOutput < Fluent::Output
|
|
84
85
|
end
|
85
86
|
chain.next
|
86
87
|
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def parse(string)
|
92
|
+
return @parser.parse(string) unless @replace_invalid_sequence
|
93
|
+
|
94
|
+
begin
|
95
|
+
@parser.parse(string)
|
96
|
+
rescue ArgumentError => e
|
97
|
+
unless e.message.index("invalid byte sequence in") == 0
|
98
|
+
raise
|
99
|
+
end
|
100
|
+
replaced_string = replace_invalid_byte(string)
|
101
|
+
@parser.parse(replaced_string)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def replace_invalid_byte(string)
|
106
|
+
replace_options = { invalid: :replace, undef: :replace, replace: '?' }
|
107
|
+
original_encoding = string.encoding
|
108
|
+
temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
|
109
|
+
string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
|
110
|
+
end
|
87
111
|
end
|
@@ -67,6 +67,22 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
67
67
|
key_name foo
|
68
68
|
]
|
69
69
|
}
|
70
|
+
assert_nothing_raised {
|
71
|
+
d = create_driver %[
|
72
|
+
tag hogelog
|
73
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
74
|
+
key_name message
|
75
|
+
suppress_parse_error_log true
|
76
|
+
]
|
77
|
+
}
|
78
|
+
assert_nothing_raised {
|
79
|
+
d = create_driver %[
|
80
|
+
tag hogelog
|
81
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
82
|
+
key_name message
|
83
|
+
suppress_parse_error_log false
|
84
|
+
]
|
85
|
+
}
|
70
86
|
d = create_driver %[
|
71
87
|
tag foo.bar
|
72
88
|
key_name foo
|
@@ -315,4 +331,136 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
315
331
|
#TODO: apache2
|
316
332
|
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
317
333
|
|
334
|
+
CONFIG_NOT_REPLACE = %[
|
335
|
+
remove_prefix test
|
336
|
+
key_name data
|
337
|
+
format /^(?<message>.*)$/
|
338
|
+
]
|
339
|
+
CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
|
340
|
+
replace_invalid_sequence true
|
341
|
+
]
|
342
|
+
def test_emit_invalid_byte
|
343
|
+
invalid_utf8 = "\xff".force_encoding('UTF-8')
|
344
|
+
|
345
|
+
d = create_driver(CONFIG_NOT_REPLACE, 'test.in')
|
346
|
+
assert_raise(ArgumentError) {
|
347
|
+
d.run do
|
348
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
349
|
+
end
|
350
|
+
}
|
351
|
+
|
352
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
|
353
|
+
assert_nothing_raised {
|
354
|
+
d.run do
|
355
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
356
|
+
end
|
357
|
+
}
|
358
|
+
emits = d.emits
|
359
|
+
assert_equal 1, emits.length
|
360
|
+
assert_nil emits[0][2]['data']
|
361
|
+
assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
|
362
|
+
|
363
|
+
d = create_driver(CONFIG_INVALID_BYTE + %[
|
364
|
+
reserve_data yes
|
365
|
+
], 'test.in')
|
366
|
+
assert_nothing_raised {
|
367
|
+
d.run do
|
368
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
369
|
+
end
|
370
|
+
}
|
371
|
+
emits = d.emits
|
372
|
+
assert_equal 1, emits.length
|
373
|
+
assert_equal invalid_utf8, emits[0][2]['data']
|
374
|
+
assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
|
375
|
+
|
376
|
+
invalid_ascii = "\xff".force_encoding('US-ASCII')
|
377
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
|
378
|
+
assert_nothing_raised {
|
379
|
+
d.run do
|
380
|
+
d.emit({'data' => invalid_ascii}, Time.now.to_i)
|
381
|
+
end
|
382
|
+
}
|
383
|
+
emits = d.emits
|
384
|
+
assert_equal 1, emits.length
|
385
|
+
assert_nil emits[0][2]['data']
|
386
|
+
assert_equal '?'.force_encoding('US-ASCII'), emits[0][2]['message']
|
387
|
+
end
|
388
|
+
|
389
|
+
# suppress_parse_error_log test
|
390
|
+
CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
391
|
+
tag hogelog
|
392
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
393
|
+
key_name message
|
394
|
+
suppress_parse_error_log false
|
395
|
+
]
|
396
|
+
CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
397
|
+
tag hogelog
|
398
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
399
|
+
key_name message
|
400
|
+
suppress_parse_error_log true
|
401
|
+
]
|
402
|
+
CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
|
403
|
+
tag hogelog
|
404
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
405
|
+
key_name message
|
406
|
+
]
|
407
|
+
|
408
|
+
INVALID_MESSAGE = 'foo bar'
|
409
|
+
VALID_MESSAGE = 'col1=foo col2=bar'
|
410
|
+
|
411
|
+
# if call warn() raise exception
|
412
|
+
class DummyLoggerWarnedException < StandardError; end
|
413
|
+
class DummyLogger
|
414
|
+
def warn(message)
|
415
|
+
raise DummyLoggerWarnedException
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_suppress_parse_error_log
|
420
|
+
# default(disabled) 'suppress_parse_error_log' is not specify
|
421
|
+
d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
422
|
+
|
423
|
+
saved_logger = $log
|
424
|
+
$log = DummyLogger.new
|
425
|
+
|
426
|
+
assert_raise(DummyLoggerWarnedException) {
|
427
|
+
d.run do
|
428
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
429
|
+
end
|
430
|
+
}
|
431
|
+
|
432
|
+
assert_nothing_raised {
|
433
|
+
d.run do
|
434
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
435
|
+
end
|
436
|
+
}
|
437
|
+
|
438
|
+
# disabled 'suppress_parse_error_log'
|
439
|
+
d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
440
|
+
|
441
|
+
assert_raise(DummyLoggerWarnedException) {
|
442
|
+
d.run do
|
443
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
444
|
+
end
|
445
|
+
}
|
446
|
+
|
447
|
+
assert_nothing_raised {
|
448
|
+
d.run do
|
449
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
450
|
+
end
|
451
|
+
}
|
452
|
+
|
453
|
+
# enabled 'suppress_parse_error_log'
|
454
|
+
d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
455
|
+
|
456
|
+
assert_nothing_raised {
|
457
|
+
d.run do
|
458
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
459
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
460
|
+
end
|
461
|
+
}
|
462
|
+
|
463
|
+
$log = saved_logger
|
464
|
+
end
|
465
|
+
|
318
466
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|