fluent-plugin-grok-parser 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +15 -6
- data/LICENSE +44 -0
- data/README.md +24 -2
- data/fluent-plugin-grok-parser.gemspec +2 -2
- data/lib/fluent/plugin/parser_grok.rb +41 -15
- data/test/test_grok_parser.rb +9 -18
- data/test/test_grok_parser_in_tcp.rb +89 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a517f65e8155156ff9151628ddead36b679d3105
|
4
|
+
data.tar.gz: 7a034a4e0b8d28234ba30612bcf7fd205aa3fbb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a16eb5eff0997a710e4dcb7d92ec60ebea27d151d8a85b61c7b7230f4579aeabaf3f54c42d0789f564d579c14eb25c4adbb5c9af0e2527336b9d3e6cb030527b
|
7
|
+
data.tar.gz: 65b1cef569bcf821cdee87dc322088448cc5dfd6fdff08bb4fe9330867e2094a4e848fe660b41d7302b8cc682c537d7d458286083d3566783d04b152c4db5723
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,34 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fluent-plugin-grok-parser (0.0.
|
5
|
-
fluentd
|
4
|
+
fluent-plugin-grok-parser (0.0.2)
|
5
|
+
fluentd (>= 0.10.58)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
10
|
cool.io (1.2.4)
|
11
|
-
fluentd (0.
|
12
|
-
cool.io (>= 1.
|
11
|
+
fluentd (0.12.1)
|
12
|
+
cool.io (>= 1.2.2, < 2.0.0)
|
13
13
|
http_parser.rb (>= 0.5.1, < 0.7.0)
|
14
14
|
json (>= 1.4.3)
|
15
|
-
msgpack (>= 0.
|
15
|
+
msgpack (>= 0.5.4, < 0.6.0)
|
16
16
|
sigdump (~> 0.2.2)
|
17
|
+
string-scrub (>= 0.0.3)
|
18
|
+
tzinfo (>= 1.0.0)
|
19
|
+
tzinfo-data (>= 1.0.0)
|
17
20
|
yajl-ruby (~> 1.0)
|
18
21
|
http_parser.rb (0.6.0)
|
19
22
|
json (1.8.1)
|
20
|
-
msgpack (0.5.
|
23
|
+
msgpack (0.5.9)
|
21
24
|
rake (10.1.1)
|
22
25
|
sigdump (0.2.2)
|
26
|
+
string-scrub (0.0.5)
|
27
|
+
thread_safe (0.3.4)
|
28
|
+
tzinfo (1.2.2)
|
29
|
+
thread_safe (~> 0.1)
|
30
|
+
tzinfo-data (1.2014.10)
|
31
|
+
tzinfo (>= 1.0.0)
|
23
32
|
yajl-ruby (1.2.1)
|
24
33
|
|
25
34
|
PLATFORMS
|
data/LICENSE
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
Copyright 2014 Kiyoto Tamura
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
|
15
|
+
For Grok's Ruby implementation found in parser_grok.rb
|
16
|
+
|
17
|
+
Copyright 2009-2013 Jordan Sissel
|
18
|
+
|
19
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
20
|
+
you may not use this file except in compliance with the License.
|
21
|
+
You may obtain a copy of the License at
|
22
|
+
|
23
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
24
|
+
|
25
|
+
Unless required by applicable law or agreed to in writing, software
|
26
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
27
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
28
|
+
See the License for the specific language governing permissions and
|
29
|
+
limitations under the License.
|
30
|
+
|
31
|
+
For Grok patterns (under patterns/):
|
32
|
+
|
33
|
+
Copyright 2009-2013 Jordan Sissel, Pete Fritchman, and contributors.
|
34
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
35
|
+
you may not use this file except in compliance with the License.
|
36
|
+
You may obtain a copy of the License at
|
37
|
+
|
38
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
39
|
+
|
40
|
+
Unless required by applicable law or agreed to in writing, software
|
41
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
42
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
43
|
+
See the License for the specific language governing permissions and
|
44
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ This is a Fluentd plugin to enable Logstash's Grok-like parsing logic.
|
|
4
4
|
|
5
5
|
## What's Grok?
|
6
6
|
|
7
|
-
Grok is a macro to simplify and reuse regexes, originally developed by [Jordan Sissel](http://github.com/
|
7
|
+
Grok is a macro to simplify and reuse regexes, originally developed by [Jordan Sissel](http://github.com/jordansissel).
|
8
8
|
|
9
9
|
This is a partial implementation of Grok's grammer that should meet most of the needs.
|
10
10
|
|
@@ -13,12 +13,34 @@ This is a partial implementation of Grok's grammer that should meet most of the
|
|
13
13
|
You can use it wherever you used the `format` parameter to parse texts. In the following example, it
|
14
14
|
extracts the first IP address that matches in the log.
|
15
15
|
|
16
|
-
```
|
16
|
+
```aconf
|
17
17
|
<source>
|
18
18
|
type tail
|
19
19
|
path /path/to/log
|
20
20
|
format grok
|
21
21
|
grok_pattern %{IP:ip_address}
|
22
|
+
tag grokked_log
|
23
|
+
</source>
|
24
|
+
```
|
25
|
+
|
26
|
+
**If you want to try multiple grok patterns and use the first matched one**, you can use the following syntax:
|
27
|
+
|
28
|
+
```aconf
|
29
|
+
<source>
|
30
|
+
type tail
|
31
|
+
path /path/to/log
|
32
|
+
format grok
|
33
|
+
<grok>
|
34
|
+
pattern %{COMBINEDAPACHELOG}
|
35
|
+
time_format "%d/%b/%Y:%H:%M:%S %z"
|
36
|
+
</grok>
|
37
|
+
<grok>
|
38
|
+
pattern %{IP:ip_address}
|
39
|
+
</grok>
|
40
|
+
<grok>
|
41
|
+
pattern %{GREEDYDATA:message}
|
42
|
+
</grok>
|
43
|
+
tag grokked_log
|
22
44
|
</source>
|
23
45
|
```
|
24
46
|
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-grok-parser"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.2"
|
8
8
|
spec.authors = ["kiyoto"]
|
9
9
|
spec.email = ["kiyoto@treasure-data.com"]
|
10
10
|
spec.summary = %q{Fluentd plugin to suppor Logstash-inspired Grok format for parsing logs}
|
@@ -18,5 +18,5 @@ Gem::Specification.new do |spec|
|
|
18
18
|
|
19
19
|
spec.add_development_dependency "bundler"
|
20
20
|
spec.add_development_dependency "rake"
|
21
|
-
spec.add_runtime_dependency "fluentd"
|
21
|
+
spec.add_runtime_dependency "fluentd", ">=0.10.58"
|
22
22
|
end
|
@@ -2,12 +2,14 @@ module Fluent
|
|
2
2
|
class TextParser
|
3
3
|
class GrokPatternNotFoundError < Exception; end
|
4
4
|
|
5
|
-
class GrokParser
|
6
|
-
|
5
|
+
class GrokParser < Parser
|
6
|
+
Plugin.register_parser('grok', self)
|
7
7
|
config_param :time_format, :string, :default => nil
|
8
|
-
config_param :grok_pattern, :string
|
8
|
+
config_param :grok_pattern, :string, :default => nil
|
9
9
|
config_param :custom_pattern_path, :string, :default => nil
|
10
10
|
|
11
|
+
# Much of the Grok implementation is based on Jordan Sissel's jls-grok
|
12
|
+
# See https://github.com/jordansissel/ruby-grok/blob/master/lib/grok-pure.rb
|
11
13
|
PATTERN_RE = \
|
12
14
|
/%\{ # match '%{' not prefixed with '\'
|
13
15
|
(?<name> # match the pattern name
|
@@ -23,6 +25,8 @@ module Fluent
|
|
23
25
|
Dir.glob(default_pattern_dir) do |pattern_file_path|
|
24
26
|
add_patterns_from_file(pattern_file_path)
|
25
27
|
end
|
28
|
+
@default_parser = NoneParser.new
|
29
|
+
@parsers = []
|
26
30
|
end
|
27
31
|
|
28
32
|
def configure(conf={})
|
@@ -38,12 +42,13 @@ module Fluent
|
|
38
42
|
end
|
39
43
|
end
|
40
44
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
if @grok_pattern
|
46
|
+
@parsers = [expand_pattern_exn(@grok_pattern, conf)]
|
47
|
+
else
|
48
|
+
grok_confs = conf.elements.select {|e| e.name == 'grok'}
|
49
|
+
grok_confs.each do |grok_conf|
|
50
|
+
@parsers << expand_pattern_exn(grok_conf['pattern'], grok_conf)
|
51
|
+
end
|
47
52
|
end
|
48
53
|
end
|
49
54
|
|
@@ -55,6 +60,14 @@ module Fluent
|
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
63
|
+
def expand_pattern_exn(pattern, conf)
|
64
|
+
regexp = expand_pattern(pattern)
|
65
|
+
$log.info "Expanded the pattern #{conf['grok_pattern']} into #{regexp}"
|
66
|
+
RegexpParser.new(Regexp.new(regexp), conf)
|
67
|
+
rescue => e
|
68
|
+
$log.error e.backtrace.join("\n")
|
69
|
+
end
|
70
|
+
|
58
71
|
def expand_pattern(pattern)
|
59
72
|
# It's okay to modify in place. no need to expand it more than once.
|
60
73
|
while true
|
@@ -73,15 +86,28 @@ module Fluent
|
|
73
86
|
pattern
|
74
87
|
end
|
75
88
|
|
76
|
-
def
|
77
|
-
if
|
78
|
-
@
|
89
|
+
def parse(text)
|
90
|
+
if block_given?
|
91
|
+
@parsers.each do |parser|
|
92
|
+
parser.parse(text) do |time, record|
|
93
|
+
if time and record
|
94
|
+
yield time, record
|
95
|
+
return
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
yield @default_parser.parse(text)
|
79
100
|
else
|
80
|
-
@parser
|
101
|
+
@parsers.each do |parser|
|
102
|
+
parser.parse(text) do |time, record|
|
103
|
+
if time and record
|
104
|
+
return time, record
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
return @default_parser.parse(text)
|
81
109
|
end
|
82
110
|
end
|
83
111
|
end
|
84
|
-
|
85
|
-
TextParser.register_template('grok', Proc.new { GrokParser.new })
|
86
112
|
end
|
87
113
|
end
|
data/test/test_grok_parser.rb
CHANGED
@@ -18,7 +18,14 @@ class GrokParserTest < ::Test::Unit::TestCase
|
|
18
18
|
def internal_test_grok_pattern(grok_pattern, text, expected_time, expected_record, options = {})
|
19
19
|
parser = TextParser::GrokParser.new
|
20
20
|
parser.configure({"grok_pattern" => grok_pattern}.merge(options))
|
21
|
-
|
21
|
+
|
22
|
+
# for the old, return based API
|
23
|
+
time, record = parser.parse(text)
|
24
|
+
assert_equal(expected_time, time) if expected_time
|
25
|
+
assert_equal(expected_record, record)
|
26
|
+
|
27
|
+
# for the new API
|
28
|
+
parser.parse(text) {|time, record|
|
22
29
|
assert_equal(expected_time, time) if expected_time
|
23
30
|
assert_equal(expected_record, record)
|
24
31
|
}
|
@@ -46,7 +53,7 @@ class GrokParserTest < ::Test::Unit::TestCase
|
|
46
53
|
{"mac_address" => "DEAD.BEEF.1234", "ip_address" => "127.0.0.1"})
|
47
54
|
end
|
48
55
|
|
49
|
-
def
|
56
|
+
def test_call_for_complex_pattern
|
50
57
|
internal_test_grok_pattern('%{COMBINEDAPACHELOG}', '127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET / HTTP/1.1" 200 777 "-" "Opera/12.0"',
|
51
58
|
str2time('28/Feb/2013:12:00:00 +0900', '%d/%b/%Y:%H:%M:%S %z'),
|
52
59
|
{
|
@@ -65,22 +72,6 @@ class GrokParserTest < ::Test::Unit::TestCase
|
|
65
72
|
)
|
66
73
|
end
|
67
74
|
|
68
|
-
def test_call_for_nagios_pattern
|
69
|
-
internal_test_grok_pattern('%{NAGIOSLOGLINE}', '[1404239939] SERVICE ALERT: servername;PING;OK;SOFT;2;PING OK - Packet loss = 16%, RTA = 72.18 ms',
|
70
|
-
str2time('1404239939', '%s'),
|
71
|
-
{
|
72
|
-
"nagios_message" => "PING OK - Packet loss = 16%, RTA = 72.18 ms",
|
73
|
-
"nagios_type" => "SERVICE ALERT",
|
74
|
-
"nagios_hostname" => "servername",
|
75
|
-
"nagios_service" => "PING",
|
76
|
-
"nagios_state" => "OK",
|
77
|
-
"nagios_statelevel" => "SOFT",
|
78
|
-
"nagios_attempt" => "2"
|
79
|
-
},
|
80
|
-
"time_format" => "%s"
|
81
|
-
)
|
82
|
-
end
|
83
|
-
|
84
75
|
def test_call_for_custom_pattern
|
85
76
|
pattern_file = File.new(File.expand_path("../my_pattern", __FILE__), "w")
|
86
77
|
pattern_file.write("MY_AWESOME_PATTERN %{GREEDYDATA:message}\n")
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fluent/test'
|
3
|
+
|
4
|
+
class TcpInputWithGrokTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
Fluent::Test.setup
|
7
|
+
end
|
8
|
+
|
9
|
+
PORT = unused_port
|
10
|
+
BASE_CONFIG = %[
|
11
|
+
port #{PORT}
|
12
|
+
tag tcp
|
13
|
+
format grok
|
14
|
+
]
|
15
|
+
CONFIG = BASE_CONFIG + %[
|
16
|
+
bind 127.0.0.1
|
17
|
+
]
|
18
|
+
IPv6_CONFIG = BASE_CONFIG + %[
|
19
|
+
bind ::1
|
20
|
+
]
|
21
|
+
|
22
|
+
def create_driver(conf)
|
23
|
+
Fluent::Test::InputTestDriver.new(Fluent::TcpInput).configure(conf)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_configure
|
27
|
+
configs = {'127.0.0.1' => CONFIG}
|
28
|
+
configs.merge!('::1' => IPv6_CONFIG) if ipv6_enabled?
|
29
|
+
|
30
|
+
configs.each_pair { |k, v|
|
31
|
+
d = create_driver(v)
|
32
|
+
assert_equal PORT, d.instance.port
|
33
|
+
assert_equal k, d.instance.bind
|
34
|
+
assert_equal "\n", d.instance.delimiter
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_grok_pattern
|
39
|
+
tests = [
|
40
|
+
{'msg' => "tcptest1\n", 'expected' => 'tcptest1'},
|
41
|
+
{'msg' => "tcptest2\n", 'expected' => 'tcptest2'},
|
42
|
+
]
|
43
|
+
block_config = %[
|
44
|
+
<grok>
|
45
|
+
pattern ${GREEDYDATA:message}
|
46
|
+
</grok>
|
47
|
+
]
|
48
|
+
|
49
|
+
internal_test_grok('grok_pattern %{GREEDYDATA:message}', tests)
|
50
|
+
internal_test_grok(block_config, tests)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_grok_multi_patterns
|
54
|
+
tests = [
|
55
|
+
{'msg' => "Current time is 2014-01-01T00:00:00+0900\n", 'expected' => '2014-01-01T00:00:00+0900'},
|
56
|
+
{'msg' => "The first word matches\n", 'expected' => 'The'}
|
57
|
+
]
|
58
|
+
block_config = %[
|
59
|
+
<grok>
|
60
|
+
pattern %{TIMESTAMP_ISO8601:message}
|
61
|
+
</grok>
|
62
|
+
<grok>
|
63
|
+
pattern %{WORD:message}
|
64
|
+
</grok>
|
65
|
+
]
|
66
|
+
internal_test_grok(block_config, tests)
|
67
|
+
end
|
68
|
+
|
69
|
+
def internal_test_grok(conf, tests)
|
70
|
+
d = create_driver(BASE_CONFIG + conf)
|
71
|
+
d.run do
|
72
|
+
tests.each {|test|
|
73
|
+
TCPSocket.open('127.0.0.1', PORT) do |s|
|
74
|
+
s.send(test['msg'], 0)
|
75
|
+
end
|
76
|
+
}
|
77
|
+
sleep 1
|
78
|
+
end
|
79
|
+
|
80
|
+
compare_test_result(d.emits, tests)
|
81
|
+
end
|
82
|
+
|
83
|
+
def compare_test_result(emits, tests)
|
84
|
+
assert_equal(2, emits.size)
|
85
|
+
emits.each_index {|i|
|
86
|
+
assert_equal(tests[i]['expected'], emits[i][2]['message'])
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-grok-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kiyoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.10.58
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.10.58
|
55
55
|
description:
|
56
56
|
email:
|
57
57
|
- kiyoto@treasure-data.com
|
@@ -61,6 +61,7 @@ extra_rdoc_files: []
|
|
61
61
|
files:
|
62
62
|
- Gemfile
|
63
63
|
- Gemfile.lock
|
64
|
+
- LICENSE
|
64
65
|
- README.md
|
65
66
|
- Rakefile
|
66
67
|
- fluent-plugin-grok-parser.gemspec
|
@@ -68,6 +69,7 @@ files:
|
|
68
69
|
- patterns/grok-patterns
|
69
70
|
- patterns/nagios
|
70
71
|
- test/test_grok_parser.rb
|
72
|
+
- test/test_grok_parser_in_tcp.rb
|
71
73
|
homepage: https://github.com/kiyoto/fluent-plugin-grok-parser
|
72
74
|
licenses:
|
73
75
|
- Apache License, Version 2.0
|
@@ -94,3 +96,4 @@ specification_version: 4
|
|
94
96
|
summary: Fluentd plugin to suppor Logstash-inspired Grok format for parsing logs
|
95
97
|
test_files:
|
96
98
|
- test/test_grok_parser.rb
|
99
|
+
- test/test_grok_parser_in_tcp.rb
|