fluent-plugin-parser 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -24
- data/fluent-plugin-parser.gemspec +2 -2
- data/lib/fluent/plugin/out_parser.rb +45 -30
- data/test/custom_parser.rb +39 -0
- data/test/plugin/test_out_parser.rb +37 -25
- data/test/plugin/test_out_parser_for_parsers.rb +285 -0
- metadata +8 -5
- data/lib/fluent/plugin/fixed_parser.rb +0 -280
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cfb579022231c0bf26eb16a1156a7e9e5aa24b8
|
4
|
+
data.tar.gz: 6b096b93cce0ddf6995e92058e44a46c0ac59b6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f59e117075d751a7a734b8e4ad15e9ef1e1a0bce142285c4f6e7b534d235009ee207d0918219ec5a3fbfdcb928280abc893957587ff5d8059d519cf0f016469
|
7
|
+
data.tar.gz: e171adea68763ffd821fc34d7ef27298122c2155e8238d461aa563e3cf3395a97c8b18ae5a125e9368222d3c9fd08ac9f1e63bea67063ad6d49f194f00065e9e
|
data/README.md
CHANGED
@@ -34,6 +34,9 @@ Of course, you can use predefined format 'apache' and 'syslog':
|
|
34
34
|
key_name message
|
35
35
|
</match>
|
36
36
|
|
37
|
+
`fluent-plugin-parser` uses parser plugins of Fluentd (and your own customized parser plugin).
|
38
|
+
See document page for more details: http://docs.fluentd.org/articles/parser-plugin-overview
|
39
|
+
|
37
40
|
If you want original attribute-data pair in re-emitted message, specify 'reserve_data':
|
38
41
|
|
39
42
|
<match raw.apache.*>
|
@@ -44,30 +47,6 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
|
|
44
47
|
reserve_data yes
|
45
48
|
</match>
|
46
49
|
|
47
|
-
Format 'json', 'csv' and 'tsv' is also supported:
|
48
|
-
|
49
|
-
<match raw.sales.*>
|
50
|
-
type parser
|
51
|
-
tag sales
|
52
|
-
format json
|
53
|
-
key_name sales
|
54
|
-
</match>
|
55
|
-
|
56
|
-
Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
|
57
|
-
|
58
|
-
<match raw.sales.*>
|
59
|
-
type parser
|
60
|
-
tag sales
|
61
|
-
format ltsv
|
62
|
-
key_name sales
|
63
|
-
</match>
|
64
|
-
|
65
|
-
'LTSV' is format like below, unlinke json, easy to write with simple formatter (ex: LogFormat of apache):
|
66
|
-
|
67
|
-
KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
|
68
|
-
|
69
|
-
About LTSV, see: http://ltsv.org/
|
70
|
-
|
71
50
|
If you want to suppress 'pattern not match' log, specify 'suppress\_parse\_error\_log true' to configuration.
|
72
51
|
default value is false.
|
73
52
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.
|
4
|
+
gem.version = "0.4.0"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
|
17
17
|
gem.add_development_dependency "rake"
|
18
|
-
gem.add_runtime_dependency "fluentd"
|
18
|
+
gem.add_runtime_dependency "fluentd", ">= 0.10.54"
|
19
19
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
require 'fluent/parser'
|
2
2
|
|
3
3
|
class Fluent::ParserOutput < Fluent::Output
|
4
4
|
Fluent::Plugin.register_output('parser', self)
|
@@ -11,6 +11,8 @@ class Fluent::ParserOutput < Fluent::Output
|
|
11
11
|
config_param :inject_key_prefix, :string, :default => nil
|
12
12
|
config_param :replace_invalid_sequence, :bool, :default => false
|
13
13
|
config_param :hash_value_field, :string, :default => nil
|
14
|
+
config_param :suppress_parse_error_log, :bool, :default => false
|
15
|
+
config_param :time_parse, :bool, :default => true
|
14
16
|
|
15
17
|
attr_reader :parser
|
16
18
|
|
@@ -19,11 +21,6 @@ class Fluent::ParserOutput < Fluent::Output
|
|
19
21
|
require 'time'
|
20
22
|
end
|
21
23
|
|
22
|
-
# Define `log` method for v0.10.42 or earlier
|
23
|
-
unless method_defined?(:log)
|
24
|
-
define_method("log") { $log }
|
25
|
-
end
|
26
|
-
|
27
24
|
def configure(conf)
|
28
25
|
super
|
29
26
|
|
@@ -41,8 +38,15 @@ class Fluent::ParserOutput < Fluent::Output
|
|
41
38
|
@added_prefix_string = @add_prefix + '.'
|
42
39
|
end
|
43
40
|
|
44
|
-
@parser =
|
41
|
+
@parser = Fluent::TextParser.new
|
42
|
+
@parser.estimate_current_event = false
|
45
43
|
@parser.configure(conf)
|
44
|
+
if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
|
45
|
+
# disable parse time
|
46
|
+
@parser.parser.time_key = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
self
|
46
50
|
end
|
47
51
|
|
48
52
|
def emit(tag, es, chain)
|
@@ -64,18 +68,28 @@ class Fluent::ParserOutput < Fluent::Output
|
|
64
68
|
end
|
65
69
|
es.each do |time,record|
|
66
70
|
raw_value = record[@key_name]
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
71
|
+
begin
|
72
|
+
@parser.parse(raw_value) do |t,values|
|
73
|
+
t ||= time
|
74
|
+
handle_parsed(tag, record, t, values)
|
75
|
+
end
|
76
|
+
rescue Fluent::TextParser::ParserError => e
|
77
|
+
log.warn e.message unless @suppress_parse_error_log
|
78
|
+
rescue ArgumentError => e
|
79
|
+
if @replace_invalid_sequence
|
80
|
+
unless e.message.index("invalid byte sequence in") == 0
|
81
|
+
raise
|
82
|
+
end
|
83
|
+
replaced_string = replace_invalid_byte(raw_value)
|
84
|
+
@parser.parse(replaced_string) do |t,values|
|
85
|
+
t ||= time
|
86
|
+
handle_parsed(tag, record, t, values)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
raise
|
90
|
+
end
|
91
|
+
rescue => e
|
92
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
79
93
|
end
|
80
94
|
end
|
81
95
|
|
@@ -84,17 +98,18 @@ class Fluent::ParserOutput < Fluent::Output
|
|
84
98
|
|
85
99
|
private
|
86
100
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
def handle_parsed(tag, record, t, values)
|
102
|
+
if values && @inject_key_prefix
|
103
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
104
|
+
end
|
105
|
+
r = @hash_value_field ? {@hash_value_field => values} : values
|
106
|
+
if @reserve_data
|
107
|
+
r = r ? record.merge(r) : record
|
108
|
+
end
|
109
|
+
if r
|
110
|
+
Fluent::Engine.emit(tag, t, r)
|
111
|
+
else
|
112
|
+
log.warn "pattern not match #{raw_value}" unless @suppress_parse_error_log
|
98
113
|
end
|
99
114
|
end
|
100
115
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Fluent
|
2
|
+
class TextParser
|
3
|
+
class KVPairParser
|
4
|
+
# key<delim1>value is pair and <pair><delim2><pair> ...
|
5
|
+
# newline splits records
|
6
|
+
include Configurable
|
7
|
+
|
8
|
+
config_param :delim1, :string
|
9
|
+
config_param :delim2, :string
|
10
|
+
|
11
|
+
config_param :time_key, :string, :default => "time"
|
12
|
+
config_param :time_format, :string, :default => nil # time_format is configurable
|
13
|
+
|
14
|
+
def configure(conf)
|
15
|
+
super
|
16
|
+
@time_parser = TimeParser.new(@time_format)
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(text)
|
20
|
+
text.split("\n").each do |line|
|
21
|
+
pairs = text.split(@delim2)
|
22
|
+
record = {}
|
23
|
+
time = nil
|
24
|
+
pairs.each do |pair|
|
25
|
+
k, v = pair.split(@delim1, 2)
|
26
|
+
if k == @time_key
|
27
|
+
time = @time_parser.parse(v)
|
28
|
+
else
|
29
|
+
record[k] = v
|
30
|
+
end
|
31
|
+
end
|
32
|
+
yield time, record
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
register_template("kv_pair", Proc.new { KVPairParser.new })
|
38
|
+
end
|
39
|
+
end
|
@@ -304,6 +304,26 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
304
304
|
assert_equal "xxx:first\tyyy:second", first[2]['data']
|
305
305
|
assert_equal 'first', second[2]['xxx']
|
306
306
|
assert_equal 'second2', second[2]['yyy']
|
307
|
+
|
308
|
+
# convert types
|
309
|
+
d = create_driver(CONFIG_LTSV + %[
|
310
|
+
types i:integer,s:string,f:float,b:bool
|
311
|
+
], 'foo.baz.test')
|
312
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
313
|
+
d.run do
|
314
|
+
d.emit({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
|
315
|
+
end
|
316
|
+
emits = d.emits
|
317
|
+
assert_equal 1, emits.length
|
318
|
+
|
319
|
+
first = emits[0]
|
320
|
+
assert_equal 'foo.bar.test', first[0]
|
321
|
+
assert_equal time, first[1]
|
322
|
+
assert_equal 1, first[2]['i']
|
323
|
+
assert_equal '2', first[2]['s']
|
324
|
+
assert_equal 3.0, first[2]['f']
|
325
|
+
assert_equal true, first[2]['b']
|
326
|
+
assert_equal '123', first[2]['x']
|
307
327
|
end
|
308
328
|
|
309
329
|
CONFIG_TSV = %[
|
@@ -473,8 +493,6 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
473
493
|
t = Time.now.to_i
|
474
494
|
d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.in')
|
475
495
|
|
476
|
-
assert_equal false, d.instance.instance_eval{ @parser }.instance_eval{ @parser }.time_parse
|
477
|
-
|
478
496
|
d.run do
|
479
497
|
d.emit({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
|
480
498
|
d.emit({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
|
@@ -515,21 +533,14 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
515
533
|
end
|
516
534
|
}
|
517
535
|
emits = d.emits
|
518
|
-
assert_equal
|
536
|
+
assert_equal 1, emits.length
|
519
537
|
|
520
538
|
assert_equal 'in', emits[0][0]
|
521
|
-
assert_equal
|
539
|
+
assert_equal 0, emits[0][1]
|
522
540
|
assert_equal 'v1', emits[0][2]['f1']
|
523
|
-
assert_equal
|
524
|
-
|
525
|
-
assert_equal 'in', emits[1][0]
|
526
|
-
assert_equal t, emits[1][1]
|
527
|
-
assert_equal 'v1', emits[1][2]['f1']
|
528
|
-
assert_equal 'thisisnottime', emits[1][2]['time']
|
541
|
+
assert_equal 0, emits[0][2]['time'].to_i
|
529
542
|
end
|
530
543
|
|
531
|
-
|
532
|
-
#TODO: apache2
|
533
544
|
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
534
545
|
|
535
546
|
CONFIG_NOT_REPLACE = %[
|
@@ -619,26 +630,27 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
619
630
|
|
620
631
|
def swap_logger(instance)
|
621
632
|
raise "use with block" unless block_given?
|
622
|
-
parser_logger = instance.parser.log
|
623
633
|
dummy = DummyLogger.new
|
624
|
-
instance.
|
625
|
-
instance.
|
626
|
-
|
627
|
-
restore = if instance.respond_to?("log=".to_sym)
|
628
|
-
saved_logger = instance.log
|
629
|
-
instance.log = dummy
|
630
|
-
lambda{ instance.log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
|
631
|
-
else
|
632
|
-
saved_logger = $log
|
633
|
-
$log = dummy
|
634
|
-
lambda{ $log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
|
635
|
-
end
|
634
|
+
saved_logger = instance.log
|
635
|
+
instance.log = dummy
|
636
|
+
restore = lambda{ instance.log = saved_logger }
|
636
637
|
|
637
638
|
yield
|
638
639
|
|
639
640
|
restore.call
|
640
641
|
end
|
641
642
|
|
643
|
+
def test_parser_error_warning
|
644
|
+
d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.in')
|
645
|
+
swap_logger(d.instance) do
|
646
|
+
assert_raise(DummyLoggerWarnedException) {
|
647
|
+
d.run do
|
648
|
+
d.emit({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
|
649
|
+
end
|
650
|
+
}
|
651
|
+
end
|
652
|
+
end
|
653
|
+
|
642
654
|
def test_suppress_parse_error_log
|
643
655
|
# default(disabled) 'suppress_parse_error_log' is not specify
|
644
656
|
d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
|
@@ -0,0 +1,285 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require_relative '../custom_parser'
|
3
|
+
|
4
|
+
class ParserOutputParsersTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
Fluent::Test.setup
|
7
|
+
end
|
8
|
+
|
9
|
+
def create_driver(conf, tag)
|
10
|
+
Fluent::Test::OutputTestDriver.new(Fluent::ParserOutput, tag).configure(conf)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_regexp_parser
|
14
|
+
# exists in test_out_parser
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_json_parser
|
18
|
+
# exists in test_out_parser
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_tsv_parser
|
22
|
+
# exists in test_out_parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ltsv_parser
|
26
|
+
# exists in test_out_parser
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_csv_parser
|
30
|
+
# exists in test_out_parser
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_none_parser
|
34
|
+
d = create_driver(<<EOF, 'test.in')
|
35
|
+
remove_prefix test
|
36
|
+
add_prefix parsed
|
37
|
+
key_name message
|
38
|
+
format none
|
39
|
+
EOF
|
40
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
41
|
+
d.run do
|
42
|
+
d.emit({"message" => "aaaa bbbb cccc 1"}, time)
|
43
|
+
d.emit({"message" => "aaaa bbbb cccc 2"}, time)
|
44
|
+
d.emit({"message" => "aaaa bbbb cccc 3"}, time)
|
45
|
+
d.emit({"message" => "aaaa bbbb cccc 4"}, time)
|
46
|
+
end
|
47
|
+
|
48
|
+
e = d.emits
|
49
|
+
assert_equal 4, e.length
|
50
|
+
|
51
|
+
assert_equal 'parsed.in', e[0][0]
|
52
|
+
assert_equal time, e[0][1]
|
53
|
+
assert_equal 'aaaa bbbb cccc 1', e[0][2]['message']
|
54
|
+
|
55
|
+
assert_equal 'parsed.in', e[1][0]
|
56
|
+
assert_equal time, e[1][1]
|
57
|
+
assert_equal 'aaaa bbbb cccc 2', e[1][2]['message']
|
58
|
+
|
59
|
+
assert_equal 'parsed.in', e[2][0]
|
60
|
+
assert_equal time, e[2][1]
|
61
|
+
assert_equal 'aaaa bbbb cccc 3', e[2][2]['message']
|
62
|
+
|
63
|
+
assert_equal 'parsed.in', e[3][0]
|
64
|
+
assert_equal time, e[3][1]
|
65
|
+
assert_equal 'aaaa bbbb cccc 4', e[3][2]['message']
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_apache_parser
|
69
|
+
log1 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
|
70
|
+
log2 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
|
71
|
+
log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
|
72
|
+
|
73
|
+
d = create_driver(<<EOF, 'test.in')
|
74
|
+
remove_prefix test
|
75
|
+
add_prefix parsed
|
76
|
+
key_name message
|
77
|
+
format apache
|
78
|
+
EOF
|
79
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
80
|
+
d.run do
|
81
|
+
d.emit({"message" => log1}, time)
|
82
|
+
d.emit({"message" => log2}, time)
|
83
|
+
end
|
84
|
+
|
85
|
+
e = d.emits
|
86
|
+
assert_equal 2, e.length
|
87
|
+
|
88
|
+
assert_equal 'parsed.in', e[0][0]
|
89
|
+
assert_equal log_time, e[0][1]
|
90
|
+
assert_equal '127.0.0.1', e[0][2]['host']
|
91
|
+
assert_equal 'frank', e[0][2]['user']
|
92
|
+
assert_equal 'GET', e[0][2]['method']
|
93
|
+
assert_equal '/apache_pb.gif', e[0][2]['path']
|
94
|
+
assert_equal '200', e[0][2]['code']
|
95
|
+
assert_equal '2326', e[0][2]['size']
|
96
|
+
assert_nil e[0][2]['referer']
|
97
|
+
assert_nil e[0][2]['agent']
|
98
|
+
|
99
|
+
assert_equal 'parsed.in', e[1][0]
|
100
|
+
assert_equal log_time, e[1][1]
|
101
|
+
assert_equal '127.0.0.1', e[1][2]['host']
|
102
|
+
assert_equal 'frank', e[1][2]['user']
|
103
|
+
assert_equal 'GET', e[1][2]['method']
|
104
|
+
assert_equal '/apache_pb.gif', e[1][2]['path']
|
105
|
+
assert_equal '200', e[1][2]['code']
|
106
|
+
assert_equal '2326', e[1][2]['size']
|
107
|
+
assert_equal 'http://www.example.com/start.html', e[1][2]['referer']
|
108
|
+
assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[1][2]['agent']
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_apache_parser_with_types
|
112
|
+
log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
|
113
|
+
log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
|
114
|
+
|
115
|
+
d = create_driver(<<EOF, 'test.in')
|
116
|
+
remove_prefix test
|
117
|
+
add_prefix parsed
|
118
|
+
key_name message
|
119
|
+
format apache
|
120
|
+
types code:integer,size:integer
|
121
|
+
EOF
|
122
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
123
|
+
d.run do
|
124
|
+
d.emit({"message" => log}, time)
|
125
|
+
end
|
126
|
+
|
127
|
+
e = d.emits
|
128
|
+
assert_equal 1, e.length
|
129
|
+
|
130
|
+
assert_equal 'parsed.in', e[0][0]
|
131
|
+
assert_equal log_time, e[0][1]
|
132
|
+
assert_equal '127.0.0.1', e[0][2]['host']
|
133
|
+
assert_equal 'frank', e[0][2]['user']
|
134
|
+
assert_equal 'GET', e[0][2]['method']
|
135
|
+
assert_equal '/apache_pb.gif', e[0][2]['path']
|
136
|
+
assert_equal 200, e[0][2]['code']
|
137
|
+
assert_equal 2326, e[0][2]['size']
|
138
|
+
assert_equal 'http://www.example.com/start.html', e[0][2]['referer']
|
139
|
+
assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[0][2]['agent']
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_syslog_parser
|
143
|
+
loglines = <<LOGS
|
144
|
+
Nov 5 16:19:48 myhost.local netbiosd[50]: name servers down?
|
145
|
+
Nov 5 16:21:20 myhost.local coreaudiod[320]: Disabled automatic stack shots because audio IO is active
|
146
|
+
Nov 5 16:21:20 myhost.local coreaudiod[320]: Enabled automatic stack shots because audio IO is inactive
|
147
|
+
LOGS
|
148
|
+
logs = loglines.split("\n").reject(&:empty?)
|
149
|
+
|
150
|
+
d = create_driver(<<EOF, 'test.in')
|
151
|
+
remove_prefix test
|
152
|
+
add_prefix parsed
|
153
|
+
key_name message
|
154
|
+
format syslog
|
155
|
+
EOF
|
156
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
157
|
+
d.run do
|
158
|
+
d.emit({"message" => logs[0]}, time)
|
159
|
+
d.emit({"message" => logs[1]}, time)
|
160
|
+
d.emit({"message" => logs[2]}, time)
|
161
|
+
end
|
162
|
+
|
163
|
+
emits = d.emits
|
164
|
+
assert_equal 3, emits.length
|
165
|
+
|
166
|
+
e = emits[0]
|
167
|
+
assert_equal 'parsed.in', e[0]
|
168
|
+
assert_equal Time.parse("2014-11-05 16:19:48").to_i, e[1]
|
169
|
+
r = e[2]
|
170
|
+
assert_equal 'myhost.local', r['host']
|
171
|
+
assert_equal 'netbiosd', r['ident']
|
172
|
+
assert_equal '50', r['pid']
|
173
|
+
assert_equal 'name servers down?', r['message']
|
174
|
+
|
175
|
+
e = emits[1]
|
176
|
+
assert_equal 'parsed.in', e[0]
|
177
|
+
assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
|
178
|
+
r = e[2]
|
179
|
+
assert_equal 'myhost.local', r['host']
|
180
|
+
assert_equal 'coreaudiod', r['ident']
|
181
|
+
assert_equal '320', r['pid']
|
182
|
+
assert_equal 'Disabled automatic stack shots because audio IO is active', r['message']
|
183
|
+
|
184
|
+
e = emits[2]
|
185
|
+
assert_equal 'parsed.in', e[0]
|
186
|
+
assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
|
187
|
+
r = e[2]
|
188
|
+
assert_equal 'myhost.local', r['host']
|
189
|
+
assert_equal 'coreaudiod', r['ident']
|
190
|
+
assert_equal '320', r['pid']
|
191
|
+
assert_equal 'Enabled automatic stack shots because audio IO is inactive', r['message']
|
192
|
+
end
|
193
|
+
|
194
|
+
def x_test_multiline_parser
|
195
|
+
# I can't configure this format well...
|
196
|
+
log1 = <<LOG
|
197
|
+
*** 2014/11/05 16:33:01 -0700
|
198
|
+
host: myhost
|
199
|
+
port: 2048
|
200
|
+
message: first line
|
201
|
+
LOG
|
202
|
+
log2 = <<LOG
|
203
|
+
*** 2014/11/05 16:33:02 +0900
|
204
|
+
host: myhost
|
205
|
+
port: 2049
|
206
|
+
message: second line
|
207
|
+
LOG
|
208
|
+
log3 = <<LOG
|
209
|
+
*** 2014/11/05 16:43:11 +1100
|
210
|
+
LOG
|
211
|
+
d = create_driver(<<'EOF', 'test.in')
|
212
|
+
remove_prefix test
|
213
|
+
add_prefix parsed
|
214
|
+
key_name message
|
215
|
+
format multiline
|
216
|
+
time_format %Y/%m/%d %H:%M:%S %z
|
217
|
+
format_firstline /^\*\*\* /
|
218
|
+
format1 /\*\*\* (?<time>\d{4}/\d\d/\d\d/ \d\d:\d\d:\d\d [-+]\d{4})/
|
219
|
+
format2 /\s*host: (?<host>[^\s]+)/
|
220
|
+
format3 /\s*port: (?<port>\d+)/
|
221
|
+
format4 /\s*message: (?<message>[^ ]*)/
|
222
|
+
EOF
|
223
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
224
|
+
d.run do
|
225
|
+
d.emit({"message" => log1}, time)
|
226
|
+
d.emit({"message" => log2}, time)
|
227
|
+
d.emit({"message" => log3}, time)
|
228
|
+
end
|
229
|
+
|
230
|
+
emits = d.emits
|
231
|
+
assert_equal 2, emits.length
|
232
|
+
|
233
|
+
e = emits[0]
|
234
|
+
assert_equal 'parsed.in', e[0]
|
235
|
+
assert_equal Time.parse("2014-11-05 16:33:01 -0700").to_i, e[1]
|
236
|
+
r = e[2]
|
237
|
+
assert_equal 'myhost', r['host']
|
238
|
+
assert_equal '2048', r['port']
|
239
|
+
assert_equal 'first line', r['message']
|
240
|
+
|
241
|
+
e = emits[1]
|
242
|
+
assert_equal 'parsed.in', e[0]
|
243
|
+
assert_equal Time.parse("2014-11-05 16:33:02 +0900").to_i, e[1]
|
244
|
+
r = e[2]
|
245
|
+
assert_equal 'myhost', r['host']
|
246
|
+
assert_equal '2049', r['port']
|
247
|
+
assert_equal 'second line', r['message']
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_custom_parser
|
251
|
+
d = create_driver(<<'EOF', 'test.in')
|
252
|
+
remove_prefix test
|
253
|
+
add_prefix parsed
|
254
|
+
key_name message
|
255
|
+
format kv_pair
|
256
|
+
time_format %Y-%m-%d %H:%M:%S %z
|
257
|
+
delim1 :
|
258
|
+
delim2 ,
|
259
|
+
EOF
|
260
|
+
time = Time.parse("2014-11-05 15:59:30").to_i
|
261
|
+
d.run do
|
262
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:1,time:2014-11-05 00:00:00 +0000"}, time)
|
263
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:2"}, time) # original time is used
|
264
|
+
d.emit({"message" => "k1:v1,k2:v2,k3:3,time:2014-11-05 00:00:00"}, time) # time parse error -> not emitted
|
265
|
+
end
|
266
|
+
emits = d.emits
|
267
|
+
assert_equal 2, emits.length
|
268
|
+
|
269
|
+
e = emits[0]
|
270
|
+
assert_equal 'parsed.in', e[0]
|
271
|
+
assert_equal Time.parse("2014-11-05 00:00:00 +0000").to_i, e[1]
|
272
|
+
r = e[2]
|
273
|
+
assert_equal 'v1', r['k1']
|
274
|
+
assert_equal 'v2', r['k2']
|
275
|
+
assert_equal '1', r['k3']
|
276
|
+
|
277
|
+
e = emits[1]
|
278
|
+
assert_equal 'parsed.in', e[0]
|
279
|
+
assert_equal Time.parse("2014-11-05 15:59:30").to_i, e[1]
|
280
|
+
r = e[2]
|
281
|
+
assert_equal 'v1', r['k1']
|
282
|
+
assert_equal 'v2', r['k2']
|
283
|
+
assert_equal '2', r['k3']
|
284
|
+
end
|
285
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 0.10.54
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.10.54
|
41
41
|
description: fluentd plugin to parse single field, or to combine log structure into
|
42
42
|
single field
|
43
43
|
email:
|
@@ -53,12 +53,13 @@ files:
|
|
53
53
|
- README.md
|
54
54
|
- Rakefile
|
55
55
|
- fluent-plugin-parser.gemspec
|
56
|
-
- lib/fluent/plugin/fixed_parser.rb
|
57
56
|
- lib/fluent/plugin/out_deparser.rb
|
58
57
|
- lib/fluent/plugin/out_parser.rb
|
58
|
+
- test/custom_parser.rb
|
59
59
|
- test/helper.rb
|
60
60
|
- test/plugin/test_deparser.rb
|
61
61
|
- test/plugin/test_out_parser.rb
|
62
|
+
- test/plugin/test_out_parser_for_parsers.rb
|
62
63
|
homepage: https://github.com/tagomoris/fluent-plugin-parser
|
63
64
|
licenses:
|
64
65
|
- APLv2
|
@@ -84,6 +85,8 @@ signing_key:
|
|
84
85
|
specification_version: 4
|
85
86
|
summary: plugin to parse/combine fluentd log messages
|
86
87
|
test_files:
|
88
|
+
- test/custom_parser.rb
|
87
89
|
- test/helper.rb
|
88
90
|
- test/plugin/test_deparser.rb
|
89
91
|
- test/plugin/test_out_parser.rb
|
92
|
+
- test/plugin/test_out_parser_for_parsers.rb
|
@@ -1,280 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This module is copied from fluentd/lib/fluent/parser.rb and
|
3
|
-
# fixed not to overwrite 'time' (reserve nil) when time not found in parsed string.
|
4
|
-
module FluentExt; end
|
5
|
-
|
6
|
-
class FluentExt::TextParser
|
7
|
-
class GenericParser
|
8
|
-
include Fluent::Configurable
|
9
|
-
|
10
|
-
config_param :time_key, :string, :default => 'time'
|
11
|
-
config_param :time_format, :string, :default => nil
|
12
|
-
config_param :time_parse, :bool, :default => true
|
13
|
-
|
14
|
-
attr_accessor :log
|
15
|
-
|
16
|
-
def initialize
|
17
|
-
super
|
18
|
-
|
19
|
-
@cache1_key = nil
|
20
|
-
@cache1_time = nil
|
21
|
-
@cache2_key = nil
|
22
|
-
@cache2_time = nil
|
23
|
-
|
24
|
-
@log = nil
|
25
|
-
end
|
26
|
-
|
27
|
-
def parse_time(record)
|
28
|
-
time = nil
|
29
|
-
|
30
|
-
unless @time_parse
|
31
|
-
return time, record
|
32
|
-
end
|
33
|
-
|
34
|
-
if value = record.delete(@time_key)
|
35
|
-
if @cache1_key == value
|
36
|
-
time = @cache1_time
|
37
|
-
elsif @cache2_key == value
|
38
|
-
time = @cache2_time
|
39
|
-
else
|
40
|
-
begin
|
41
|
-
time = if @time_format
|
42
|
-
Time.strptime(value, @time_format).to_i
|
43
|
-
else
|
44
|
-
Time.parse(value).to_i
|
45
|
-
end
|
46
|
-
@cache1_key = @cache2_key
|
47
|
-
@cache1_time = @cache2_time
|
48
|
-
@cache2_key = value
|
49
|
-
@cache2_time = time
|
50
|
-
rescue TypeError, ArgumentError => e
|
51
|
-
@log.warn "Failed to parse time", :key => @time_key, :value => value
|
52
|
-
record[@time_key] = value
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
return time, record
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
class RegexpParser < GenericParser
|
62
|
-
include Fluent::Configurable
|
63
|
-
|
64
|
-
config_param :suppress_parse_error_log, :bool, :default => false
|
65
|
-
|
66
|
-
def initialize(regexp, conf={})
|
67
|
-
super()
|
68
|
-
@regexp = regexp
|
69
|
-
unless conf.empty?
|
70
|
-
configure(conf)
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def call(text)
|
75
|
-
m = @regexp.match(text)
|
76
|
-
unless m
|
77
|
-
unless @suppress_parse_error_log
|
78
|
-
@log.warn "pattern not match: #{text}"
|
79
|
-
end
|
80
|
-
|
81
|
-
return nil, nil
|
82
|
-
end
|
83
|
-
|
84
|
-
record = {}
|
85
|
-
m.names.each {|name|
|
86
|
-
record[name] = m[name] if m[name]
|
87
|
-
}
|
88
|
-
parse_time(record)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
class JSONParser < GenericParser
|
93
|
-
def call(text)
|
94
|
-
record = Yajl.load(text)
|
95
|
-
return parse_time(record)
|
96
|
-
rescue Yajl::ParseError
|
97
|
-
unless @suppress_parse_error_log
|
98
|
-
@log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
99
|
-
end
|
100
|
-
|
101
|
-
return nil, nil
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
class LabeledTSVParser < GenericParser
|
106
|
-
def call(text)
|
107
|
-
record = Hash[text.split("\t").map{|p| p.split(":", 2)}]
|
108
|
-
parse_time(record)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
class ValuesParser < GenericParser
|
113
|
-
config_param :keys, :string
|
114
|
-
|
115
|
-
def configure(conf)
|
116
|
-
super
|
117
|
-
@keys = @keys.split(",")
|
118
|
-
end
|
119
|
-
|
120
|
-
def values_map(values)
|
121
|
-
Hash[@keys.zip(values)]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
class TSVParser < ValuesParser
|
126
|
-
config_param :delimiter, :string, :default => "\t"
|
127
|
-
|
128
|
-
def call(text)
|
129
|
-
return parse_time(values_map(text.split(@delimiter)))
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
class CSVParser < ValuesParser
|
134
|
-
def initialize
|
135
|
-
super
|
136
|
-
require 'csv'
|
137
|
-
end
|
138
|
-
|
139
|
-
def call(text)
|
140
|
-
return parse_time(values_map(CSV.parse_line(text)))
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
class ApacheParser < GenericParser
|
145
|
-
include Fluent::Configurable
|
146
|
-
|
147
|
-
REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
148
|
-
|
149
|
-
def initialize
|
150
|
-
super
|
151
|
-
|
152
|
-
@time_key = "time"
|
153
|
-
@time_format = "%d/%b/%Y:%H:%M:%S %z"
|
154
|
-
end
|
155
|
-
|
156
|
-
def call(text)
|
157
|
-
m = REGEXP.match(text)
|
158
|
-
unless m
|
159
|
-
unless @suppress_parse_error_log
|
160
|
-
@log.warn "pattern not match: #{text.inspect}"
|
161
|
-
end
|
162
|
-
|
163
|
-
return nil, nil
|
164
|
-
end
|
165
|
-
|
166
|
-
host = m['host']
|
167
|
-
host = (host == '-') ? nil : host
|
168
|
-
|
169
|
-
user = m['user']
|
170
|
-
user = (user == '-') ? nil : user
|
171
|
-
|
172
|
-
time = m['time']
|
173
|
-
|
174
|
-
method = m['method']
|
175
|
-
path = m['path']
|
176
|
-
|
177
|
-
code = m['code'].to_i
|
178
|
-
code = nil if code == 0
|
179
|
-
|
180
|
-
size = m['size']
|
181
|
-
size = (size == '-') ? nil : size.to_i
|
182
|
-
|
183
|
-
referer = m['referer']
|
184
|
-
referer = (referer == '-') ? nil : referer
|
185
|
-
|
186
|
-
agent = m['agent']
|
187
|
-
agent = (agent == '-') ? nil : agent
|
188
|
-
|
189
|
-
record = {
|
190
|
-
"time" => time,
|
191
|
-
"host" => host,
|
192
|
-
"user" => user,
|
193
|
-
"method" => method,
|
194
|
-
"path" => path,
|
195
|
-
"code" => code,
|
196
|
-
"size" => size,
|
197
|
-
"referer" => referer,
|
198
|
-
"agent" => agent,
|
199
|
-
}
|
200
|
-
|
201
|
-
parse_time(record)
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
TEMPLATE_FACTORIES = {
|
206
|
-
'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
207
|
-
'apache2' => Proc.new { ApacheParser.new },
|
208
|
-
'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
209
|
-
'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
|
210
|
-
'json' => Proc.new { JSONParser.new },
|
211
|
-
'csv' => Proc.new { CSVParser.new },
|
212
|
-
'tsv' => Proc.new { TSVParser.new },
|
213
|
-
'ltsv' => Proc.new { LabeledTSVParser.new },
|
214
|
-
}
|
215
|
-
|
216
|
-
def self.register_template(name, regexp_or_proc, time_format=nil)
|
217
|
-
|
218
|
-
factory = if regexp_or_proc.is_a?(Regexp)
|
219
|
-
regexp = regexp_or_proc
|
220
|
-
Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
|
221
|
-
else
|
222
|
-
Proc.new { proc }
|
223
|
-
end
|
224
|
-
TEMPLATE_FACTORIES[name] = factory
|
225
|
-
end
|
226
|
-
|
227
|
-
attr_accessor :log
|
228
|
-
attr_reader :parser
|
229
|
-
|
230
|
-
def initialize(logger)
|
231
|
-
@log = logger
|
232
|
-
@parser = nil
|
233
|
-
end
|
234
|
-
|
235
|
-
def configure(conf, required=true)
|
236
|
-
format = conf['format']
|
237
|
-
|
238
|
-
if format == nil
|
239
|
-
if required
|
240
|
-
raise Fluent::ConfigError, "'format' parameter is required"
|
241
|
-
else
|
242
|
-
return nil
|
243
|
-
end
|
244
|
-
end
|
245
|
-
|
246
|
-
if format[0] == ?/ && format[format.length-1] == ?/
|
247
|
-
# regexp
|
248
|
-
begin
|
249
|
-
regexp = Regexp.new(format[1..-2])
|
250
|
-
if regexp.named_captures.empty?
|
251
|
-
raise "No named captures"
|
252
|
-
end
|
253
|
-
rescue
|
254
|
-
raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
|
255
|
-
end
|
256
|
-
@parser = RegexpParser.new(regexp)
|
257
|
-
|
258
|
-
else
|
259
|
-
# built-in template
|
260
|
-
factory = TEMPLATE_FACTORIES[format]
|
261
|
-
unless factory
|
262
|
-
raise Fluent::ConfigError, "Unknown format template '#{format}'"
|
263
|
-
end
|
264
|
-
@parser = factory.call
|
265
|
-
|
266
|
-
end
|
267
|
-
|
268
|
-
@parser.log = @log
|
269
|
-
|
270
|
-
if @parser.respond_to?(:configure)
|
271
|
-
@parser.configure(conf)
|
272
|
-
end
|
273
|
-
|
274
|
-
return true
|
275
|
-
end
|
276
|
-
|
277
|
-
def parse(text)
|
278
|
-
return @parser.call(text)
|
279
|
-
end
|
280
|
-
end
|