fluent-plugin-parser 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -0
- data/fluent-plugin-parser.gemspec +1 -1
- data/lib/fluent/plugin/fixed_parser.rb +13 -3
- data/lib/fluent/plugin/out_parser.rb +26 -2
- data/test/plugin/test_out_parser.rb +148 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -65,6 +65,20 @@ Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
|
|
65
65
|
|
66
66
|
KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
|
67
67
|
|
68
|
+
About LTSV, see: http://ltsv.org/
|
69
|
+
|
70
|
+
If you want to suppress 'pattern not match' log, specify 'suppress_parse_error_log true' to configuration.
|
71
|
+
default value is false.
|
72
|
+
|
73
|
+
<match in.hogelog>
|
74
|
+
type parser
|
75
|
+
tag hogelog
|
76
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
77
|
+
key_name message
|
78
|
+
suppress_parse_error_log true
|
79
|
+
</match>
|
80
|
+
|
81
|
+
|
68
82
|
### DeparserOutput
|
69
83
|
|
70
84
|
To build CSV from field 'store','item','num', as field 'csv', without raw data:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.2.
|
4
|
+
gem.version = "0.2.2"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -8,6 +8,7 @@ class FluentExt::TextParser
|
|
8
8
|
include Fluent::Configurable
|
9
9
|
|
10
10
|
config_param :time_format, :string, :default => nil
|
11
|
+
config_param :suppress_parse_error_log, :bool, :default => false
|
11
12
|
|
12
13
|
def initialize(regexp, conf={})
|
13
14
|
super()
|
@@ -20,7 +21,10 @@ class FluentExt::TextParser
|
|
20
21
|
def call(text)
|
21
22
|
m = @regexp.match(text)
|
22
23
|
unless m
|
23
|
-
|
24
|
+
unless @suppress_parse_error_log
|
25
|
+
$log.warn "pattern not match: #{text}"
|
26
|
+
end
|
27
|
+
|
24
28
|
return nil, nil
|
25
29
|
end
|
26
30
|
|
@@ -70,7 +74,10 @@ class FluentExt::TextParser
|
|
70
74
|
record = Yajl.load(text)
|
71
75
|
return parse_time(record)
|
72
76
|
rescue Yajl::ParseError
|
73
|
-
|
77
|
+
unless @suppress_parse_error_log
|
78
|
+
$log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
79
|
+
end
|
80
|
+
|
74
81
|
return nil, nil
|
75
82
|
end
|
76
83
|
end
|
@@ -122,7 +129,10 @@ class FluentExt::TextParser
|
|
122
129
|
def call(text)
|
123
130
|
m = REGEXP.match(text)
|
124
131
|
unless m
|
125
|
-
|
132
|
+
unless @suppress_parse_error_log
|
133
|
+
$log.warn "pattern not match: #{text.inspect}"
|
134
|
+
end
|
135
|
+
|
126
136
|
return nil, nil
|
127
137
|
end
|
128
138
|
|
@@ -8,6 +8,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
8
8
|
config_param :add_prefix, :string, :default => nil
|
9
9
|
config_param :key_name, :string
|
10
10
|
config_param :reserve_data, :bool, :default => false
|
11
|
+
config_param :replace_invalid_sequence, :bool, :default => false
|
11
12
|
|
12
13
|
def initialize
|
13
14
|
super
|
@@ -56,7 +57,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
56
57
|
es.each {|time,record|
|
57
58
|
value = record[@key_name]
|
58
59
|
t,values = if value
|
59
|
-
|
60
|
+
parse(value)
|
60
61
|
else
|
61
62
|
[nil, nil]
|
62
63
|
end
|
@@ -72,7 +73,7 @@ class Fluent::ParserOutput < Fluent::Output
|
|
72
73
|
es.each {|time,record|
|
73
74
|
value = record[@key_name]
|
74
75
|
t,values = if value
|
75
|
-
|
76
|
+
parse(value)
|
76
77
|
else
|
77
78
|
[nil, nil]
|
78
79
|
end
|
@@ -84,4 +85,27 @@ class Fluent::ParserOutput < Fluent::Output
|
|
84
85
|
end
|
85
86
|
chain.next
|
86
87
|
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def parse(string)
|
92
|
+
return @parser.parse(string) unless @replace_invalid_sequence
|
93
|
+
|
94
|
+
begin
|
95
|
+
@parser.parse(string)
|
96
|
+
rescue ArgumentError => e
|
97
|
+
unless e.message.index("invalid byte sequence in") == 0
|
98
|
+
raise
|
99
|
+
end
|
100
|
+
replaced_string = replace_invalid_byte(string)
|
101
|
+
@parser.parse(replaced_string)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def replace_invalid_byte(string)
|
106
|
+
replace_options = { invalid: :replace, undef: :replace, replace: '?' }
|
107
|
+
original_encoding = string.encoding
|
108
|
+
temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
|
109
|
+
string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
|
110
|
+
end
|
87
111
|
end
|
@@ -67,6 +67,22 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
67
67
|
key_name foo
|
68
68
|
]
|
69
69
|
}
|
70
|
+
assert_nothing_raised {
|
71
|
+
d = create_driver %[
|
72
|
+
tag hogelog
|
73
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
74
|
+
key_name message
|
75
|
+
suppress_parse_error_log true
|
76
|
+
]
|
77
|
+
}
|
78
|
+
assert_nothing_raised {
|
79
|
+
d = create_driver %[
|
80
|
+
tag hogelog
|
81
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
82
|
+
key_name message
|
83
|
+
suppress_parse_error_log false
|
84
|
+
]
|
85
|
+
}
|
70
86
|
d = create_driver %[
|
71
87
|
tag foo.bar
|
72
88
|
key_name foo
|
@@ -315,4 +331,136 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
315
331
|
#TODO: apache2
|
316
332
|
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
317
333
|
|
334
|
+
CONFIG_NOT_REPLACE = %[
|
335
|
+
remove_prefix test
|
336
|
+
key_name data
|
337
|
+
format /^(?<message>.*)$/
|
338
|
+
]
|
339
|
+
CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
|
340
|
+
replace_invalid_sequence true
|
341
|
+
]
|
342
|
+
def test_emit_invalid_byte
|
343
|
+
invalid_utf8 = "\xff".force_encoding('UTF-8')
|
344
|
+
|
345
|
+
d = create_driver(CONFIG_NOT_REPLACE, 'test.in')
|
346
|
+
assert_raise(ArgumentError) {
|
347
|
+
d.run do
|
348
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
349
|
+
end
|
350
|
+
}
|
351
|
+
|
352
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
|
353
|
+
assert_nothing_raised {
|
354
|
+
d.run do
|
355
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
356
|
+
end
|
357
|
+
}
|
358
|
+
emits = d.emits
|
359
|
+
assert_equal 1, emits.length
|
360
|
+
assert_nil emits[0][2]['data']
|
361
|
+
assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
|
362
|
+
|
363
|
+
d = create_driver(CONFIG_INVALID_BYTE + %[
|
364
|
+
reserve_data yes
|
365
|
+
], 'test.in')
|
366
|
+
assert_nothing_raised {
|
367
|
+
d.run do
|
368
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
369
|
+
end
|
370
|
+
}
|
371
|
+
emits = d.emits
|
372
|
+
assert_equal 1, emits.length
|
373
|
+
assert_equal invalid_utf8, emits[0][2]['data']
|
374
|
+
assert_equal '?'.force_encoding('UTF-8'), emits[0][2]['message']
|
375
|
+
|
376
|
+
invalid_ascii = "\xff".force_encoding('US-ASCII')
|
377
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
|
378
|
+
assert_nothing_raised {
|
379
|
+
d.run do
|
380
|
+
d.emit({'data' => invalid_ascii}, Time.now.to_i)
|
381
|
+
end
|
382
|
+
}
|
383
|
+
emits = d.emits
|
384
|
+
assert_equal 1, emits.length
|
385
|
+
assert_nil emits[0][2]['data']
|
386
|
+
assert_equal '?'.force_encoding('US-ASCII'), emits[0][2]['message']
|
387
|
+
end
|
388
|
+
|
389
|
+
# suppress_parse_error_log test
|
390
|
+
CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
391
|
+
tag hogelog
|
392
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
393
|
+
key_name message
|
394
|
+
suppress_parse_error_log false
|
395
|
+
]
|
396
|
+
CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
397
|
+
tag hogelog
|
398
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
399
|
+
key_name message
|
400
|
+
suppress_parse_error_log true
|
401
|
+
]
|
402
|
+
CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
|
403
|
+
tag hogelog
|
404
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
405
|
+
key_name message
|
406
|
+
]
|
407
|
+
|
408
|
+
INVALID_MESSAGE = 'foo bar'
|
409
|
+
VALID_MESSAGE = 'col1=foo col2=bar'
|
410
|
+
|
411
|
+
# if call warn() raise exception
|
412
|
+
class DummyLoggerWarnedException < StandardError; end
|
413
|
+
class DummyLogger
|
414
|
+
def warn(message)
|
415
|
+
raise DummyLoggerWarnedException
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_suppress_parse_error_log
|
420
|
+
# default(disabled) 'suppress_parse_error_log' is not specify
|
421
|
+
d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
422
|
+
|
423
|
+
saved_logger = $log
|
424
|
+
$log = DummyLogger.new
|
425
|
+
|
426
|
+
assert_raise(DummyLoggerWarnedException) {
|
427
|
+
d.run do
|
428
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
429
|
+
end
|
430
|
+
}
|
431
|
+
|
432
|
+
assert_nothing_raised {
|
433
|
+
d.run do
|
434
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
435
|
+
end
|
436
|
+
}
|
437
|
+
|
438
|
+
# disabled 'suppress_parse_error_log'
|
439
|
+
d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
440
|
+
|
441
|
+
assert_raise(DummyLoggerWarnedException) {
|
442
|
+
d.run do
|
443
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
444
|
+
end
|
445
|
+
}
|
446
|
+
|
447
|
+
assert_nothing_raised {
|
448
|
+
d.run do
|
449
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
450
|
+
end
|
451
|
+
}
|
452
|
+
|
453
|
+
# enabled 'suppress_parse_error_log'
|
454
|
+
d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
455
|
+
|
456
|
+
assert_nothing_raised {
|
457
|
+
d.run do
|
458
|
+
d.emit({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
459
|
+
d.emit({'message' => VALID_MESSAGE}, Time.now.to_i)
|
460
|
+
end
|
461
|
+
}
|
462
|
+
|
463
|
+
$log = saved_logger
|
464
|
+
end
|
465
|
+
|
318
466
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|