fluent-plugin-parser 0.3.4 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -24
- data/fluent-plugin-parser.gemspec +2 -2
- data/lib/fluent/plugin/out_parser.rb +45 -30
- data/test/custom_parser.rb +39 -0
- data/test/plugin/test_out_parser.rb +37 -25
- data/test/plugin/test_out_parser_for_parsers.rb +285 -0
- metadata +8 -5
- data/lib/fluent/plugin/fixed_parser.rb +0 -280
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cfb579022231c0bf26eb16a1156a7e9e5aa24b8
|
4
|
+
data.tar.gz: 6b096b93cce0ddf6995e92058e44a46c0ac59b6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f59e117075d751a7a734b8e4ad15e9ef1e1a0bce142285c4f6e7b534d235009ee207d0918219ec5a3fbfdcb928280abc893957587ff5d8059d519cf0f016469
|
7
|
+
data.tar.gz: e171adea68763ffd821fc34d7ef27298122c2155e8238d461aa563e3cf3395a97c8b18ae5a125e9368222d3c9fd08ac9f1e63bea67063ad6d49f194f00065e9e
|
data/README.md
CHANGED
@@ -34,6 +34,9 @@ Of course, you can use predefined format 'apache' and 'syslog':
|
|
34
34
|
key_name message
|
35
35
|
</match>
|
36
36
|
|
37
|
+
`fluent-plugin-parser` uses parser plugins of Fluentd (and your own customized parser plugin).
|
38
|
+
See document page for more details: http://docs.fluentd.org/articles/parser-plugin-overview
|
39
|
+
|
37
40
|
If you want original attribute-data pair in re-emitted message, specify 'reserve_data':
|
38
41
|
|
39
42
|
<match raw.apache.*>
|
@@ -44,30 +47,6 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
|
|
44
47
|
reserve_data yes
|
45
48
|
</match>
|
46
49
|
|
47
|
-
Format 'json', 'csv' and 'tsv' is also supported:
|
48
|
-
|
49
|
-
<match raw.sales.*>
|
50
|
-
type parser
|
51
|
-
tag sales
|
52
|
-
format json
|
53
|
-
key_name sales
|
54
|
-
</match>
|
55
|
-
|
56
|
-
Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
|
57
|
-
|
58
|
-
<match raw.sales.*>
|
59
|
-
type parser
|
60
|
-
tag sales
|
61
|
-
format ltsv
|
62
|
-
key_name sales
|
63
|
-
</match>
|
64
|
-
|
65
|
-
'LTSV' is format like below, unlinke json, easy to write with simple formatter (ex: LogFormat of apache):
|
66
|
-
|
67
|
-
KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
|
68
|
-
|
69
|
-
About LTSV, see: http://ltsv.org/
|
70
|
-
|
71
50
|
If you want to suppress 'pattern not match' log, specify 'suppress\_parse\_error\_log true' to configuration.
|
72
51
|
default value is false.
|
73
52
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.
|
4
|
+
gem.version = "0.4.0"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
|
17
17
|
gem.add_development_dependency "rake"
|
18
|
-
gem.add_runtime_dependency "fluentd"
|
18
|
+
gem.add_runtime_dependency "fluentd", ">= 0.10.54"
|
19
19
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require 'fluent/parser'
|
2
2
|
|
3
3
|
class Fluent::ParserOutput < Fluent::Output
|
4
4
|
Fluent::Plugin.register_output('parser', self)
|
@@ -11,6 +11,8 @@ class Fluent::ParserOutput < Fluent::Output
|
|
11
11
|
config_param :inject_key_prefix, :string, :default => nil
|
12
12
|
config_param :replace_invalid_sequence, :bool, :default => false
|
13
13
|
config_param :hash_value_field, :string, :default => nil
|
14
|
+
config_param :suppress_parse_error_log, :bool, :default => false
|
15
|
+
config_param :time_parse, :bool, :default => true
|
14
16
|
|
15
17
|
attr_reader :parser
|
16
18
|
|
@@ -19,11 +21,6 @@ class Fluent::ParserOutput < Fluent::Output
|
|
19
21
|
require 'time'
|
20
22
|
end
|
21
23
|
|
22
|
-
# Define `log` method for v0.10.42 or earlier
|
23
|
-
unless method_defined?(:log)
|
24
|
-
define_method("log") { $log }
|
25
|
-
end
|
26
|
-
|
27
24
|
def configure(conf)
|
28
25
|
super
|
29
26
|
|
@@ -41,8 +38,15 @@ class Fluent::ParserOutput < Fluent::Output
|
|
41
38
|
@added_prefix_string = @add_prefix + '.'
|
42
39
|
end
|
43
40
|
|
44
|
-
@parser =
|
41
|
+
@parser = Fluent::TextParser.new
|
42
|
+
@parser.estimate_current_event = false
|
45
43
|
@parser.configure(conf)
|
44
|
+
if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
|
45
|
+
# disable parse time
|
46
|
+
@parser.parser.time_key = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
self
|
46
50
|
end
|
47
51
|
|
48
52
|
def emit(tag, es, chain)
|
@@ -64,18 +68,28 @@ class Fluent::ParserOutput < Fluent::Output
|
|
64
68
|
end
|
65
69
|
es.each do |time,record|
|
66
70
|
raw_value = record[@key_name]
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
71
|
+
begin
|
72
|
+
@parser.parse(raw_value) do |t,values|
|
73
|
+
t ||= time
|
74
|
+
handle_parsed(tag, record, t, values)
|
75
|
+
end
|
76
|
+
rescue Fluent::TextParser::ParserError => e
|
77
|
+
log.warn e.message unless @suppress_parse_error_log
|
78
|
+
rescue ArgumentError => e
|
79
|
+
if @replace_invalid_sequence
|
80
|
+
unless e.message.index("invalid byte sequence in") == 0
|
81
|
+
raise
|
82
|
+
end
|
83
|
+
replaced_string = replace_invalid_byte(raw_value)
|
84
|
+
@parser.parse(replaced_string) do |t,values|
|
85
|
+
t ||= time
|
86
|
+
handle_parsed(tag, record, t, values)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
raise
|
90
|
+
end
|
91
|
+
rescue => e
|
92
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
79
93
|
end
|
80
94
|
end
|
81
95
|
|
@@ -84,17 +98,18 @@ class Fluent::ParserOutput < Fluent::Output
|
|
84
98
|
|
85
99
|
private
|
86
100
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
def handle_parsed(tag, record, t, values)
|
102
|
+
if values && @inject_key_prefix
|
103
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
104
|
+
end
|
105
|
+
r = @hash_value_field ? {@hash_value_field => values} : values
|
106
|
+
if @reserve_data
|
107
|
+
r = r ? record.merge(r) : record
|
108
|
+
end
|
109
|
+
if r
|
110
|
+
Fluent::Engine.emit(tag, t, r)
|
111
|
+
else
|
112
|
+
log.warn "pattern not match #{raw_value}" unless @suppress_parse_error_log
|
98
113
|
end
|
99
114
|
end
|
100
115
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Fluent
|
2
|
+
class TextParser
|
3
|
+
class KVPairParser
|
4
|
+
# key<delim1>value is pair and <pair><delim2><pair> ...
|
5
|
+
# newline splits records
|
6
|
+
include Configurable
|
7
|
+
|
8
|
+
config_param :delim1, :string
|
9
|
+
config_param :delim2, :string
|
10
|
+
|
11
|
+
config_param :time_key, :string, :default => "time"
|
12
|
+
config_param :time_format, :string, :default => nil # time_format is configurable
|
13
|
+
|
14
|
+
def configure(conf)
|
15
|
+
super
|
16
|
+
@time_parser = TimeParser.new(@time_format)
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(text)
|
20
|
+
text.split("\n").each do |line|
|
21
|
+
pairs = text.split(@delim2)
|
22
|
+
record = {}
|
23
|
+
time = nil
|
24
|
+
pairs.each do |pair|
|
25
|
+
k, v = pair.split(@delim1, 2)
|
26
|
+
if k == @time_key
|
27
|
+
time = @time_parser.parse(v)
|
28
|
+
else
|
29
|
+
record[k] = v
|
30
|
+
end
|
31
|
+
end
|
32
|
+
yield time, record
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
register_template("kv_pair", Proc.new { KVPairParser.new })
|
38
|
+
end
|
39
|
+
end
|
@@ -304,6 +304,26 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
304
304
|
assert_equal "xxx:first\tyyy:second", first[2]['data']
|
305
305
|
assert_equal 'first', second[2]['xxx']
|
306
306
|
assert_equal 'second2', second[2]['yyy']
|
307
|
+
|
308
|
+
# convert types
|
309
|
+
d = create_driver(CONFIG_LTSV + %[
|
310
|
+
types i:integer,s:string,f:float,b:bool
|
311
|
+
], 'foo.baz.test')
|
312
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
313
|
+
d.run do
|
314
|
+
d.emit({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
|
315
|
+
end
|
316
|
+
emits = d.emits
|
317
|
+
assert_equal 1, emits.length
|
318
|
+
|
319
|
+
first = emits[0]
|
320
|
+
assert_equal 'foo.bar.test', first[0]
|
321
|
+
assert_equal time, first[1]
|
322
|
+
assert_equal 1, first[2]['i']
|
323
|
+
assert_equal '2', first[2]['s']
|
324
|
+
assert_equal 3.0, first[2]['f']
|
325
|
+
assert_equal true, first[2]['b']
|
326
|
+
assert_equal '123', first[2]['x']
|
307
327
|
end
|
308
328
|
|
309
329
|
CONFIG_TSV = %[
|
@@ -473,8 +493,6 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
473
493
|
t = Time.now.to_i
|
474
494
|
d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.in')
|
475
495
|
|
476
|
-
assert_equal false, d.instance.instance_eval{ @parser }.instance_eval{ @parser }.time_parse
|
477
|
-
|
478
496
|
d.run do
|
479
497
|
d.emit({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
|
480
498
|
d.emit({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
|
@@ -515,21 +533,14 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
515
533
|
end
|
516
534
|
}
|
517
535
|
emits = d.emits
|
518
|
-
assert_equal
|
536
|
+
assert_equal 1, emits.length
|
519
537
|
|
520
538
|
assert_equal 'in', emits[0][0]
|
521
|
-
assert_equal
|
539
|
+
assert_equal 0, emits[0][1]
|
522
540
|
assert_equal 'v1', emits[0][2]['f1']
|
523
|
-
assert_equal
|
524
|
-
|
525
|
-
assert_equal 'in', emits[1][0]
|
526
|
-
assert_equal t, emits[1][1]
|
527
|
-
assert_equal 'v1', emits[1][2]['f1']
|
528
|
-
assert_equal 'thisisnottime', emits[1][2]['time']
|
541
|
+
assert_equal 0, emits[0][2]['time'].to_i
|
529
542
|
end
|
530
543
|
|
531
|
-
|
532
|
-
#TODO: apache2
|
533
544
|
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
534
545
|
|
535
546
|
CONFIG_NOT_REPLACE = %[
|
@@ -619,26 +630,27 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
619
630
|
|
620
631
|
def swap_logger(instance)
|
621
632
|
raise "use with block" unless block_given?
|
622
|
-
parser_logger = instance.parser.log
|
623
633
|
dummy = DummyLogger.new
|
624
|
-
instance.
|
625
|
-
instance.
|
626
|
-
|
627
|
-
restore = if instance.respond_to?("log=".to_sym)
|
628
|
-
saved_logger = instance.log
|
629
|
-
instance.log = dummy
|
630
|
-
lambda{ instance.log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
|
631
|
-
else
|
632
|
-
saved_logger = $log
|
633
|
-
$log = dummy
|
634
|
-
lambda{ $log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
|
635
|
-
end
|
634
|
+
saved_logger = instance.log
|
635
|
+
instance.log = dummy
|
636
|
+
restore = lambda{ instance.log = saved_logger }
|
636
637
|
|
637
638
|
yield
|
638
639
|
|
639
640
|
restore.call
|
640
641
|
end
|
641
642
|
|
643
|
+
def test_parser_error_warning
|
644
|
+
d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.in')
|
645
|
+
swap_logger(d.instance) do
|
646
|
+
assert_raise(DummyLoggerWarnedException) {
|
647
|
+
d.run do
|
648
|
+
d.emit({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
|
649
|
+
end
|
650
|
+
}
|
651
|
+
end
|
652
|
+
end
|
653
|
+
|
642
654
|
def test_suppress_parse_error_log
|
643
655
|
# default(disabled) 'suppress_parse_error_log' is not specify
|
644
656
|
d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require_relative '../custom_parser'
|
3
|
+
|
4
|
+
class ParserOutputParsersTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
Fluent::Test.setup
|
7
|
+
end
|
8
|
+
|
9
|
+
def create_driver(conf, tag)
|
10
|
+
Fluent::Test::OutputTestDriver.new(Fluent::ParserOutput, tag).configure(conf)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_regexp_parser
|
14
|
+
# exists in test_out_parser
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_json_parser
|
18
|
+
# exists in test_out_parser
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_tsv_parser
|
22
|
+
# exists in test_out_parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ltsv_parser
|
26
|
+
# exists in test_out_parser
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_csv_parser
|
30
|
+
# exists in test_out_parser
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_none_parser
|
34
|
+
d = create_driver(<<EOF, 'test.in')
|
35
|
+
remove_prefix test
|
36
|
+
add_prefix parsed
|
37
|
+
key_name message
|
38
|
+
format none
|
39
|
+
EOF
|
40
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
41
|
+
d.run do
|
42
|
+
d.emit({"message" => "aaaa bbbb cccc 1"}, time)
|
43
|
+
d.emit({"message" => "aaaa bbbb cccc 2"}, time)
|
44
|
+
d.emit({"message" => "aaaa bbbb cccc 3"}, time)
|
45
|
+
d.emit({"message" => "aaaa bbbb cccc 4"}, time)
|
46
|
+
end
|
47
|
+
|
48
|
+
e = d.emits
|
49
|
+
assert_equal 4, e.length
|
50
|
+
|
51
|
+
assert_equal 'parsed.in', e[0][0]
|
52
|
+
assert_equal time, e[0][1]
|
53
|
+
assert_equal 'aaaa bbbb cccc 1', e[0][2]['message']
|
54
|
+
|
55
|
+
assert_equal 'parsed.in', e[1][0]
|
56
|
+
assert_equal time, e[1][1]
|
57
|
+
assert_equal 'aaaa bbbb cccc 2', e[1][2]['message']
|
58
|
+
|
59
|
+
assert_equal 'parsed.in', e[2][0]
|
60
|
+
assert_equal time, e[2][1]
|
61
|
+
assert_equal 'aaaa bbbb cccc 3', e[2][2]['message']
|
62
|
+
|
63
|
+
assert_equal 'parsed.in', e[3][0]
|
64
|
+
assert_equal time, e[3][1]
|
65
|
+
assert_equal 'aaaa bbbb cccc 4', e[3][2]['message']
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_apache_parser
|
69
|
+
log1 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
|
70
|
+
log2 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
|
71
|
+
log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
|
72
|
+
|
73
|
+
d = create_driver(<<EOF, 'test.in')
|
74
|
+
remove_prefix test
|
75
|
+
add_prefix parsed
|
76
|
+
key_name message
|
77
|
+
format apache
|
78
|
+
EOF
|
79
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
80
|
+
d.run do
|
81
|
+
d.emit({"message" => log1}, time)
|
82
|
+
d.emit({"message" => log2}, time)
|
83
|
+
end
|
84
|
+
|
85
|
+
e = d.emits
|
86
|
+
assert_equal 2, e.length
|
87
|
+
|
88
|
+
assert_equal 'parsed.in', e[0][0]
|
89
|
+
assert_equal log_time, e[0][1]
|
90
|
+
assert_equal '127.0.0.1', e[0][2]['host']
|
91
|
+
assert_equal 'frank', e[0][2]['user']
|
92
|
+
assert_equal 'GET', e[0][2]['method']
|
93
|
+
assert_equal '/apache_pb.gif', e[0][2]['path']
|
94
|
+
assert_equal '200', e[0][2]['code']
|
95
|
+
assert_equal '2326', e[0][2]['size']
|
96
|
+
assert_nil e[0][2]['referer']
|
97
|
+
assert_nil e[0][2]['agent']
|
98
|
+
|
99
|
+
assert_equal 'parsed.in', e[1][0]
|
100
|
+
assert_equal log_time, e[1][1]
|
101
|
+
assert_equal '127.0.0.1', e[1][2]['host']
|
102
|
+
assert_equal 'frank', e[1][2]['user']
|
103
|
+
assert_equal 'GET', e[1][2]['method']
|
104
|
+
assert_equal '/apache_pb.gif', e[1][2]['path']
|
105
|
+
assert_equal '200', e[1][2]['code']
|
106
|
+
assert_equal '2326', e[1][2]['size']
|
107
|
+
assert_equal 'http://www.example.com/start.html', e[1][2]['referer']
|
108
|
+
assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[1][2]['agent']
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_apache_parser_with_types
|
112
|
+
log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
|
113
|
+
log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
|
114
|
+
|
115
|
+
d = create_driver(<<EOF, 'test.in')
|
116
|
+
remove_prefix test
|
117
|
+
add_prefix parsed
|
118
|
+
key_name message
|
119
|
+
format apache
|
120
|
+
types code:integer,size:integer
|
121
|
+
EOF
|
122
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
123
|
+
d.run do
|
124
|
+
d.emit({"message" => log}, time)
|
125
|
+
end
|
126
|
+
|
127
|
+
e = d.emits
|
128
|
+
assert_equal 1, e.length
|
129
|
+
|
130
|
+
assert_equal 'parsed.in', e[0][0]
|
131
|
+
assert_equal log_time, e[0][1]
|
132
|
+
assert_equal '127.0.0.1', e[0][2]['host']
|
133
|
+
assert_equal 'frank', e[0][2]['user']
|
134
|
+
assert_equal 'GET', e[0][2]['method']
|
135
|
+
assert_equal '/apache_pb.gif', e[0][2]['path']
|
136
|
+
assert_equal 200, e[0][2]['code']
|
137
|
+
assert_equal 2326, e[0][2]['size']
|
138
|
+
assert_equal 'http://www.example.com/start.html', e[0][2]['referer']
|
139
|
+
assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[0][2]['agent']
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_syslog_parser
|
143
|
+
loglines = <<LOGS
|
144
|
+
Nov 5 16:19:48 myhost.local netbiosd[50]: name servers down?
|
145
|
+
Nov 5 16:21:20 myhost.local coreaudiod[320]: Disabled automatic stack shots because audio IO is active
|
146
|
+
Nov 5 16:21:20 myhost.local coreaudiod[320]: Enabled automatic stack shots because audio IO is inactive
|
147
|
+
LOGS
|
148
|
+
logs = loglines.split("\n").reject(&:empty?)
|
149
|
+
|
150
|
+
d = create_driver(<<EOF, 'test.in')
|
151
|
+
remove_prefix test
|
152
|
+
add_prefix parsed
|
153
|
+
key_name message
|
154
|
+
format syslog
|
155
|
+
EOF
|
156
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
157
|
+
d.run do
|
158
|
+
d.emit({"message" => logs[0]}, time)
|
159
|
+
d.emit({"message" => logs[1]}, time)
|
160
|
+
d.emit({"message" => logs[2]}, time)
|
161
|
+
end
|
162
|
+
|
163
|
+
emits = d.emits
|
164
|
+
assert_equal 3, emits.length
|
165
|
+
|
166
|
+
e = emits[0]
|
167
|
+
assert_equal 'parsed.in', e[0]
|
168
|
+
assert_equal Time.parse("2014-11-05 16:19:48").to_i, e[1]
|
169
|
+
r = e[2]
|
170
|
+
assert_equal 'myhost.local', r['host']
|
171
|
+
assert_equal 'netbiosd', r['ident']
|
172
|
+
assert_equal '50', r['pid']
|
173
|
+
assert_equal 'name servers down?', r['message']
|
174
|
+
|
175
|
+
e = emits[1]
|
176
|
+
assert_equal 'parsed.in', e[0]
|
177
|
+
assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
|
178
|
+
r = e[2]
|
179
|
+
assert_equal 'myhost.local', r['host']
|
180
|
+
assert_equal 'coreaudiod', r['ident']
|
181
|
+
assert_equal '320', r['pid']
|
182
|
+
assert_equal 'Disabled automatic stack shots because audio IO is active', r['message']
|
183
|
+
|
184
|
+
e = emits[2]
|
185
|
+
assert_equal 'parsed.in', e[0]
|
186
|
+
assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
|
187
|
+
r = e[2]
|
188
|
+
assert_equal 'myhost.local', r['host']
|
189
|
+
assert_equal 'coreaudiod', r['ident']
|
190
|
+
assert_equal '320', r['pid']
|
191
|
+
assert_equal 'Enabled automatic stack shots because audio IO is inactive', r['message']
|
192
|
+
end
|
193
|
+
|
194
|
+
def x_test_multiline_parser
|
195
|
+
# I can't configure this format well...
|
196
|
+
log1 = <<LOG
|
197
|
+
*** 2014/11/05 16:33:01 -0700
|
198
|
+
host: myhost
|
199
|
+
port: 2048
|
200
|
+
message: first line
|
201
|
+
LOG
|
202
|
+
log2 = <<LOG
|
203
|
+
*** 2014/11/05 16:33:02 +0900
|
204
|
+
host: myhost
|
205
|
+
port: 2049
|
206
|
+
message: second line
|
207
|
+
LOG
|
208
|
+
log3 = <<LOG
|
209
|
+
*** 2014/11/05 16:43:11 +1100
|
210
|
+
LOG
|
211
|
+
d = create_driver(<<'EOF', 'test.in')
|
212
|
+
remove_prefix test
|
213
|
+
add_prefix parsed
|
214
|
+
key_name message
|
215
|
+
format multiline
|
216
|
+
time_format %Y/%m/%d %H:%M:%S %z
|
217
|
+
format_firstline /^\*\*\* /
|
218
|
+
format1 /\*\*\* (?<time>\d{4}/\d\d/\d\d/ \d\d:\d\d:\d\d [-+]\d{4})/
|
219
|
+
format2 /\s*host: (?<host>[^\s]+)/
|
220
|
+
format3 /\s*port: (?<port>\d+)/
|
221
|
+
format4 /\s*message: (?<message>[^ ]*)/
|
222
|
+
EOF
|
223
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
224
|
+
d.run do
|
225
|
+
d.emit({"message" => log1}, time)
|
226
|
+
d.emit({"message" => log2}, time)
|
227
|
+
d.emit({"message" => log3}, time)
|
228
|
+
end
|
229
|
+
|
230
|
+
emits = d.emits
|
231
|
+
assert_equal 2, emits.length
|
232
|
+
|
233
|
+
e = emits[0]
|
234
|
+
assert_equal 'parsed.in', e[0]
|
235
|
+
assert_equal Time.parse("2014-11-05 16:33:01 -0700").to_i, e[1]
|
236
|
+
r = e[2]
|
237
|
+
assert_equal 'myhost', r['host']
|
238
|
+
assert_equal '2048', r['port']
|
239
|
+
assert_equal 'first line', r['message']
|
240
|
+
|
241
|
+
e = emits[1]
|
242
|
+
assert_equal 'parsed.in', e[0]
|
243
|
+
assert_equal Time.parse("2014-11-05 16:33:02 +0900").to_i, e[1]
|
244
|
+
r = e[2]
|
245
|
+
assert_equal 'myhost', r['host']
|
246
|
+
assert_equal '2049', r['port']
|
247
|
+
assert_equal 'second line', r['message']
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_custom_parser
|
251
|
+
d = create_driver(<<'EOF', 'test.in')
|
252
|
+
remove_prefix test
|
253
|
+
add_prefix parsed
|
254
|
+
key_name message
|
255
|
+
format kv_pair
|
256
|
+
time_format %Y-%m-%d %H:%M:%S %z
|
257
|
+
delim1 :
|
258
|
+
delim2 ,
|
259
|
+
EOF
|
260
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
261
|
+
d.run do
|
262
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:1,time:2014-11-05 00:00:00 +0000"}, time)
|
263
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:2"}, time) # original time is used
|
264
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:3,time:2014-11-05 00:00:00"}, time) # time parse error -> not emitted
|
265
|
+
end
|
266
|
+
emits = d.emits
|
267
|
+
assert_equal 2, emits.length
|
268
|
+
|
269
|
+
e = emits[0]
|
270
|
+
assert_equal 'parsed.in', e[0]
|
271
|
+
assert_equal Time.parse("2014-11-05 00:00:00 +0000").to_i, e[1]
|
272
|
+
r = e[2]
|
273
|
+
assert_equal 'v1', r['k1']
|
274
|
+
assert_equal 'v2', r['k2']
|
275
|
+
assert_equal '1', r['k3']
|
276
|
+
|
277
|
+
e = emits[1]
|
278
|
+
assert_equal 'parsed.in', e[0]
|
279
|
+
assert_equal Time.parse("2014-11-05 15:59:30").to_i, e[1]
|
280
|
+
r = e[2]
|
281
|
+
assert_equal 'v1', r['k1']
|
282
|
+
assert_equal 'v2', r['k2']
|
283
|
+
assert_equal '2', r['k3']
|
284
|
+
end
|
285
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.10.54
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.10.54
|
41
41
|
description: fluentd plugin to parse single field, or to combine log structure into
|
42
42
|
single field
|
43
43
|
email:
|
@@ -53,12 +53,13 @@ files:
|
|
53
53
|
- README.md
|
54
54
|
- Rakefile
|
55
55
|
- fluent-plugin-parser.gemspec
|
56
|
-
- lib/fluent/plugin/fixed_parser.rb
|
57
56
|
- lib/fluent/plugin/out_deparser.rb
|
58
57
|
- lib/fluent/plugin/out_parser.rb
|
58
|
+
- test/custom_parser.rb
|
59
59
|
- test/helper.rb
|
60
60
|
- test/plugin/test_deparser.rb
|
61
61
|
- test/plugin/test_out_parser.rb
|
62
|
+
- test/plugin/test_out_parser_for_parsers.rb
|
62
63
|
homepage: https://github.com/tagomoris/fluent-plugin-parser
|
63
64
|
licenses:
|
64
65
|
- APLv2
|
@@ -84,6 +85,8 @@ signing_key:
|
|
84
85
|
specification_version: 4
|
85
86
|
summary: plugin to parse/combine fluentd log messages
|
86
87
|
test_files:
|
88
|
+
- test/custom_parser.rb
|
87
89
|
- test/helper.rb
|
88
90
|
- test/plugin/test_deparser.rb
|
89
91
|
- test/plugin/test_out_parser.rb
|
92
|
+
- test/plugin/test_out_parser_for_parsers.rb
|
@@ -1,280 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This module is copied from fluentd/lib/fluent/parser.rb and
|
3
|
-
# fixed not to overwrite 'time' (reserve nil) when time not found in parsed string.
|
4
|
-
module FluentExt; end
|
5
|
-
|
6
|
-
class FluentExt::TextParser
|
7
|
-
class GenericParser
|
8
|
-
include Fluent::Configurable
|
9
|
-
|
10
|
-
config_param :time_key, :string, :default => 'time'
|
11
|
-
config_param :time_format, :string, :default => nil
|
12
|
-
config_param :time_parse, :bool, :default => true
|
13
|
-
|
14
|
-
attr_accessor :log
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
super
|
18
|
-
|
19
|
-
@cache1_key = nil
|
20
|
-
@cache1_time = nil
|
21
|
-
@cache2_key = nil
|
22
|
-
@cache2_time = nil
|
23
|
-
|
24
|
-
@log = nil
|
25
|
-
end
|
26
|
-
|
27
|
-
def parse_time(record)
|
28
|
-
time = nil
|
29
|
-
|
30
|
-
unless @time_parse
|
31
|
-
return time, record
|
32
|
-
end
|
33
|
-
|
34
|
-
if value = record.delete(@time_key)
|
35
|
-
if @cache1_key == value
|
36
|
-
time = @cache1_time
|
37
|
-
elsif @cache2_key == value
|
38
|
-
time = @cache2_time
|
39
|
-
else
|
40
|
-
begin
|
41
|
-
time = if @time_format
|
42
|
-
Time.strptime(value, @time_format).to_i
|
43
|
-
else
|
44
|
-
Time.parse(value).to_i
|
45
|
-
end
|
46
|
-
@cache1_key = @cache2_key
|
47
|
-
@cache1_time = @cache2_time
|
48
|
-
@cache2_key = value
|
49
|
-
@cache2_time = time
|
50
|
-
rescue TypeError, ArgumentError => e
|
51
|
-
@log.warn "Failed to parse time", :key => @time_key, :value => value
|
52
|
-
record[@time_key] = value
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
return time, record
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
class RegexpParser < GenericParser
|
62
|
-
include Fluent::Configurable
|
63
|
-
|
64
|
-
config_param :suppress_parse_error_log, :bool, :default => false
|
65
|
-
|
66
|
-
def initialize(regexp, conf={})
|
67
|
-
super()
|
68
|
-
@regexp = regexp
|
69
|
-
unless conf.empty?
|
70
|
-
configure(conf)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def call(text)
|
75
|
-
m = @regexp.match(text)
|
76
|
-
unless m
|
77
|
-
unless @suppress_parse_error_log
|
78
|
-
@log.warn "pattern not match: #{text}"
|
79
|
-
end
|
80
|
-
|
81
|
-
return nil, nil
|
82
|
-
end
|
83
|
-
|
84
|
-
record = {}
|
85
|
-
m.names.each {|name|
|
86
|
-
record[name] = m[name] if m[name]
|
87
|
-
}
|
88
|
-
parse_time(record)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
class JSONParser < GenericParser
|
93
|
-
def call(text)
|
94
|
-
record = Yajl.load(text)
|
95
|
-
return parse_time(record)
|
96
|
-
rescue Yajl::ParseError
|
97
|
-
unless @suppress_parse_error_log
|
98
|
-
@log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
99
|
-
end
|
100
|
-
|
101
|
-
return nil, nil
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class LabeledTSVParser < GenericParser
|
106
|
-
def call(text)
|
107
|
-
record = Hash[text.split("\t").map{|p| p.split(":", 2)}]
|
108
|
-
parse_time(record)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
class ValuesParser < GenericParser
|
113
|
-
config_param :keys, :string
|
114
|
-
|
115
|
-
def configure(conf)
|
116
|
-
super
|
117
|
-
@keys = @keys.split(",")
|
118
|
-
end
|
119
|
-
|
120
|
-
def values_map(values)
|
121
|
-
Hash[@keys.zip(values)]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
class TSVParser < ValuesParser
|
126
|
-
config_param :delimiter, :string, :default => "\t"
|
127
|
-
|
128
|
-
def call(text)
|
129
|
-
return parse_time(values_map(text.split(@delimiter)))
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
class CSVParser < ValuesParser
|
134
|
-
def initialize
|
135
|
-
super
|
136
|
-
require 'csv'
|
137
|
-
end
|
138
|
-
|
139
|
-
def call(text)
|
140
|
-
return parse_time(values_map(CSV.parse_line(text)))
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
class ApacheParser < GenericParser
|
145
|
-
include Fluent::Configurable
|
146
|
-
|
147
|
-
REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
148
|
-
|
149
|
-
def initialize
|
150
|
-
super
|
151
|
-
|
152
|
-
@time_key = "time"
|
153
|
-
@time_format = "%d/%b/%Y:%H:%M:%S %z"
|
154
|
-
end
|
155
|
-
|
156
|
-
def call(text)
|
157
|
-
m = REGEXP.match(text)
|
158
|
-
unless m
|
159
|
-
unless @suppress_parse_error_log
|
160
|
-
@log.warn "pattern not match: #{text.inspect}"
|
161
|
-
end
|
162
|
-
|
163
|
-
return nil, nil
|
164
|
-
end
|
165
|
-
|
166
|
-
host = m['host']
|
167
|
-
host = (host == '-') ? nil : host
|
168
|
-
|
169
|
-
user = m['user']
|
170
|
-
user = (user == '-') ? nil : user
|
171
|
-
|
172
|
-
time = m['time']
|
173
|
-
|
174
|
-
method = m['method']
|
175
|
-
path = m['path']
|
176
|
-
|
177
|
-
code = m['code'].to_i
|
178
|
-
code = nil if code == 0
|
179
|
-
|
180
|
-
size = m['size']
|
181
|
-
size = (size == '-') ? nil : size.to_i
|
182
|
-
|
183
|
-
referer = m['referer']
|
184
|
-
referer = (referer == '-') ? nil : referer
|
185
|
-
|
186
|
-
agent = m['agent']
|
187
|
-
agent = (agent == '-') ? nil : agent
|
188
|
-
|
189
|
-
record = {
|
190
|
-
"time" => time,
|
191
|
-
"host" => host,
|
192
|
-
"user" => user,
|
193
|
-
"method" => method,
|
194
|
-
"path" => path,
|
195
|
-
"code" => code,
|
196
|
-
"size" => size,
|
197
|
-
"referer" => referer,
|
198
|
-
"agent" => agent,
|
199
|
-
}
|
200
|
-
|
201
|
-
parse_time(record)
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
TEMPLATE_FACTORIES = {
|
206
|
-
'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
207
|
-
'apache2' => Proc.new { ApacheParser.new },
|
208
|
-
'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
209
|
-
'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
|
210
|
-
'json' => Proc.new { JSONParser.new },
|
211
|
-
'csv' => Proc.new { CSVParser.new },
|
212
|
-
'tsv' => Proc.new { TSVParser.new },
|
213
|
-
'ltsv' => Proc.new { LabeledTSVParser.new },
|
214
|
-
}
|
215
|
-
|
216
|
-
def self.register_template(name, regexp_or_proc, time_format=nil)
|
217
|
-
|
218
|
-
factory = if regexp_or_proc.is_a?(Regexp)
|
219
|
-
regexp = regexp_or_proc
|
220
|
-
Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
|
221
|
-
else
|
222
|
-
Proc.new { proc }
|
223
|
-
end
|
224
|
-
TEMPLATE_FACTORIES[name] = factory
|
225
|
-
end
|
226
|
-
|
227
|
-
attr_accessor :log
|
228
|
-
attr_reader :parser
|
229
|
-
|
230
|
-
def initialize(logger)
|
231
|
-
@log = logger
|
232
|
-
@parser = nil
|
233
|
-
end
|
234
|
-
|
235
|
-
def configure(conf, required=true)
|
236
|
-
format = conf['format']
|
237
|
-
|
238
|
-
if format == nil
|
239
|
-
if required
|
240
|
-
raise Fluent::ConfigError, "'format' parameter is required"
|
241
|
-
else
|
242
|
-
return nil
|
243
|
-
end
|
244
|
-
end
|
245
|
-
|
246
|
-
if format[0] == ?/ && format[format.length-1] == ?/
|
247
|
-
# regexp
|
248
|
-
begin
|
249
|
-
regexp = Regexp.new(format[1..-2])
|
250
|
-
if regexp.named_captures.empty?
|
251
|
-
raise "No named captures"
|
252
|
-
end
|
253
|
-
rescue
|
254
|
-
raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
|
255
|
-
end
|
256
|
-
@parser = RegexpParser.new(regexp)
|
257
|
-
|
258
|
-
else
|
259
|
-
# built-in template
|
260
|
-
factory = TEMPLATE_FACTORIES[format]
|
261
|
-
unless factory
|
262
|
-
raise Fluent::ConfigError, "Unknown format template '#{format}'"
|
263
|
-
end
|
264
|
-
@parser = factory.call
|
265
|
-
|
266
|
-
end
|
267
|
-
|
268
|
-
@parser.log = @log
|
269
|
-
|
270
|
-
if @parser.respond_to?(:configure)
|
271
|
-
@parser.configure(conf)
|
272
|
-
end
|
273
|
-
|
274
|
-
return true
|
275
|
-
end
|
276
|
-
|
277
|
-
def parse(text)
|
278
|
-
return @parser.call(text)
|
279
|
-
end
|
280
|
-
end
|