fluent-plugin-grok-parser 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -13
- data/fluent-plugin-grok-parser.gemspec +1 -1
- data/lib/fluent/plugin/grok.rb +16 -8
- data/lib/fluent/plugin/parser_grok.rb +12 -0
- data/lib/fluent/plugin/parser_multiline_grok.rb +2 -1
- data/test/test_grok_parser.rb +65 -9
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 939a55ebb2788709efae355b0cecb6109833c05f
|
4
|
+
data.tar.gz: 6b60181db56be7303078d767f0e40b8b980ddcd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60a0f3d0cf432f1f0202baa92281bf19127105cf259624d670fbdb816b996a8baed8eb68d405767b3676cdb2ee886635815cdd44cba8fdd19a658e7159c1ff54
|
7
|
+
data.tar.gz: ffcb09c3c7c459daabe83de859048d7ac30b7e43fe95bf49594f9cb35b5d45d74cec862d5a61041f61c793cd9e1b93e81b53bd986fe4c94ef25cb0c5d21e32b0
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ extracts the first IP address that matches in the log.
|
|
15
15
|
|
16
16
|
```aconf
|
17
17
|
<source>
|
18
|
-
type tail
|
18
|
+
@type tail
|
19
19
|
path /path/to/log
|
20
20
|
format grok
|
21
21
|
grok_pattern %{IP:ip_address}
|
@@ -27,7 +27,7 @@ extracts the first IP address that matches in the log.
|
|
27
27
|
|
28
28
|
```aconf
|
29
29
|
<source>
|
30
|
-
type tail
|
30
|
+
@type tail
|
31
31
|
path /path/to/log
|
32
32
|
format grok
|
33
33
|
<grok>
|
@@ -50,7 +50,7 @@ You can parse multiple line text.
|
|
50
50
|
|
51
51
|
```aconf
|
52
52
|
<source>
|
53
|
-
type tail
|
53
|
+
@type tail
|
54
54
|
path /path/to/log
|
55
55
|
format multiline_grok
|
56
56
|
grok_pattern %{IP:ip_address}\n%{GREEDYDATA:message}
|
@@ -63,7 +63,7 @@ You can use multiple grok patterns to parse your data.
|
|
63
63
|
|
64
64
|
```aconf
|
65
65
|
<source>
|
66
|
-
type tail
|
66
|
+
@type tail
|
67
67
|
path /path/to/log
|
68
68
|
format multiline_grok
|
69
69
|
<grok>
|
@@ -104,7 +104,7 @@ This is what the `custom_pattern_path` parameter is for.
|
|
104
104
|
|
105
105
|
```
|
106
106
|
<source>
|
107
|
-
type tail
|
107
|
+
@type tail
|
108
108
|
path /path/to/log
|
109
109
|
format grok
|
110
110
|
grok_pattern %{MY_SUPER_PATTERN}
|
@@ -123,17 +123,15 @@ Although every parsed field has type `string` by default, you can specify other
|
|
123
123
|
The syntax is
|
124
124
|
|
125
125
|
```
|
126
|
-
|
126
|
+
grok_pattern %{GROK_PATTERN:NAME:TYPE}...
|
127
127
|
```
|
128
128
|
|
129
129
|
e.g.,
|
130
130
|
|
131
131
|
```
|
132
|
-
|
132
|
+
grok_pattern %{INT:foo:integer}
|
133
133
|
```
|
134
134
|
|
135
|
-
As demonstrated above, "," is used to delimit field-type pairs while ":" is used to separate a field name with its intended type.
|
136
|
-
|
137
135
|
Unspecified fields are parsed at the default string type.
|
138
136
|
|
139
137
|
The list of supported types are shown below:
|
@@ -145,7 +143,7 @@ The list of supported types are shown below:
|
|
145
143
|
* `time`
|
146
144
|
* `array`
|
147
145
|
|
148
|
-
For the `time` and `array` types, there is an optional
|
146
|
+
For the `time` and `array` types, there is an optional 4th field after the type name. For the "time" type, you can specify a time format like you would in `time_format`.
|
149
147
|
|
150
148
|
For the "array" type, the third field specifies the delimiter (the default is ","). For example, if a field called "item\_ids" contains the value "3,4,5", `types item_ids:array` parses it as ["3", "4", "5"]. Alternatively, if the value is "Adam|Alice|Bob", `types item_ids:array:|` parses it as ["Adam", "Alice", "Bob"].
|
151
149
|
|
@@ -153,11 +151,10 @@ Here is a sample config using the Grok parser with `in_tail` and the `types` par
|
|
153
151
|
|
154
152
|
```aconf
|
155
153
|
<source>
|
156
|
-
type tail
|
154
|
+
@type tail
|
157
155
|
path /path/to/log
|
158
156
|
format grok
|
159
|
-
grok_pattern %{INT:user_id} paid %{NUMBER:paid_amount}
|
160
|
-
types user_id:integer,paid_amount:float
|
157
|
+
grok_pattern %{INT:user_id:integer} paid %{NUMBER:paid_amount:float}
|
161
158
|
tag payment
|
162
159
|
</source>
|
163
160
|
```
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-grok-parser"
|
7
|
-
spec.version = "0.0
|
7
|
+
spec.version = "0.1.0"
|
8
8
|
spec.authors = ["kiyoto"]
|
9
9
|
spec.email = ["kiyoto@treasure-data.com"]
|
10
10
|
spec.summary = %q{Fluentd plugin to support Logstash-inspired Grok format for parsing logs}
|
data/lib/fluent/plugin/grok.rb
CHANGED
@@ -9,7 +9,10 @@ module Fluent
|
|
9
9
|
/%\{ # match '%{' not prefixed with '\'
|
10
10
|
(?<name> # match the pattern name
|
11
11
|
(?<pattern>[A-z0-9]+)
|
12
|
-
(?::(?<subname>[@\[\]A-z0-9_:.-]
|
12
|
+
(?::(?<subname>[@\[\]A-z0-9_:.-]+?)
|
13
|
+
(?::(?<type>(?:string|bool|integer|float|
|
14
|
+
time(?::.+)?|
|
15
|
+
array(?::.)?)))?)?
|
13
16
|
)
|
14
17
|
\}/x
|
15
18
|
|
@@ -51,12 +54,15 @@ module Fluent
|
|
51
54
|
private
|
52
55
|
|
53
56
|
def expand_pattern_expression(grok_pattern, conf)
|
54
|
-
regexp = expand_pattern(grok_pattern)
|
57
|
+
regexp, types = expand_pattern(grok_pattern)
|
55
58
|
$log.info "Expanded the pattern #{conf['grok_pattern']} into #{regexp}"
|
56
59
|
options = nil
|
57
60
|
if @multiline_mode
|
58
61
|
options = Regexp::MULTILINE
|
59
62
|
end
|
63
|
+
unless types.empty?
|
64
|
+
conf["types"] = types.map{|subname,type| "#{subname}:#{type}" }.join(",")
|
65
|
+
end
|
60
66
|
TextParser::RegexpParser.new(Regexp.new(regexp, options), conf)
|
61
67
|
rescue GrokPatternNotFoundError => e
|
62
68
|
raise e
|
@@ -66,20 +72,22 @@ module Fluent
|
|
66
72
|
|
67
73
|
def expand_pattern(pattern)
|
68
74
|
# It's okay to modify in place. no need to expand it more than once.
|
75
|
+
type_map = {}
|
69
76
|
while true
|
70
77
|
m = PATTERN_RE.match(pattern)
|
71
78
|
break unless m
|
72
79
|
curr_pattern = @pattern_map[m["pattern"]]
|
73
80
|
raise GrokPatternNotFoundError unless curr_pattern
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
81
|
+
if m["subname"]
|
82
|
+
replacement_pattern = "(?<#{m["subname"]}>#{curr_pattern})"
|
83
|
+
type_map[m["subname"]] = m["type"] || "string"
|
84
|
+
else
|
85
|
+
replacement_pattern = curr_pattern
|
86
|
+
end
|
79
87
|
pattern.sub!(m[0]) do |s| replacement_pattern end
|
80
88
|
end
|
81
89
|
|
82
|
-
pattern
|
90
|
+
[pattern, type_map]
|
83
91
|
end
|
84
92
|
end
|
85
93
|
end
|
@@ -6,8 +6,20 @@ module Fluent
|
|
6
6
|
|
7
7
|
class GrokParser < Parser
|
8
8
|
Plugin.register_parser('grok', self)
|
9
|
+
|
10
|
+
# For fluentd v0.12.16 or earlier
|
11
|
+
class << self
|
12
|
+
unless method_defined?(:desc)
|
13
|
+
def desc(description)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
desc 'The format of the time field.'
|
9
19
|
config_param :time_format, :string, :default => nil
|
20
|
+
desc 'The pattern of grok'
|
10
21
|
config_param :grok_pattern, :string, :default => nil
|
22
|
+
desc 'Path to the file that includes custom grok patterns'
|
11
23
|
config_param :custom_pattern_path, :string, :default => nil
|
12
24
|
|
13
25
|
def initialize
|
@@ -4,8 +4,9 @@ module Fluent
|
|
4
4
|
class TextParser
|
5
5
|
class MultilineGrokParser < GrokParser
|
6
6
|
Plugin.register_parser('multiline_grok', self)
|
7
|
-
config_param :multiline_start_regexp, :string, :default => nil
|
8
7
|
|
8
|
+
desc 'The regexp to match beginning of multiline'
|
9
|
+
config_param :multiline_start_regexp, :string, :default => nil
|
9
10
|
|
10
11
|
def initialize
|
11
12
|
super
|
data/test/test_grok_parser.rb
CHANGED
@@ -15,15 +15,26 @@ def str2time(str_time, format = nil)
|
|
15
15
|
end
|
16
16
|
|
17
17
|
class GrokParserTest < ::Test::Unit::TestCase
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
18
|
+
class Timestamp < self
|
19
|
+
def test_timestamp_iso8601
|
20
|
+
internal_test_grok_pattern('%{TIMESTAMP_ISO8601:time}', 'Some stuff at 2014-01-01T00:00:00+0900',
|
21
|
+
str2time('2014-01-01T00:00:00+0900'), {})
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_datestamp_rfc822_with_zone
|
25
|
+
internal_test_grok_pattern('%{DATESTAMP_RFC822:time}', 'Some stuff at Mon Aug 15 2005 15:52:01 UTC',
|
26
|
+
str2time('Mon Aug 15 2005 15:52:01 UTC'), {})
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_datestamp_rfc822_with_numeric_zone
|
30
|
+
internal_test_grok_pattern('%{DATESTAMP_RFC2822:time}', 'Some stuff at Mon, 15 Aug 2005 15:52:01 +0000',
|
31
|
+
str2time('Mon, 15 Aug 2005 15:52:01 +0000'), {})
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_syslogtimestamp
|
35
|
+
internal_test_grok_pattern('%{SYSLOGTIMESTAMP:time}', 'Some stuff at Aug 01 00:00:00',
|
36
|
+
str2time('Aug 01 00:00:00'), {})
|
37
|
+
end
|
27
38
|
end
|
28
39
|
|
29
40
|
def test_call_for_grok_pattern_not_found
|
@@ -70,6 +81,51 @@ class GrokParserTest < ::Test::Unit::TestCase
|
|
70
81
|
end
|
71
82
|
end
|
72
83
|
|
84
|
+
class OptionalType < self
|
85
|
+
def test_simple
|
86
|
+
internal_test_grok_pattern('%{INT:user_id:integer} paid %{NUMBER:paid_amount:float}',
|
87
|
+
'12345 paid 6789.10', nil,
|
88
|
+
{"user_id" => 12345, "paid_amount" => 6789.1 })
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_array
|
92
|
+
internal_test_grok_pattern('%{GREEDYDATA:message:array}',
|
93
|
+
'a,b,c,d', nil,
|
94
|
+
{"message" => %w(a b c d)})
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_array_with_delimiter
|
98
|
+
internal_test_grok_pattern('%{GREEDYDATA:message:array:|}',
|
99
|
+
'a|b|c|d', nil,
|
100
|
+
{"message" => %w(a b c d)})
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_timestamp_iso8601
|
104
|
+
internal_test_grok_pattern('%{TIMESTAMP_ISO8601:stamp:time}', 'Some stuff at 2014-01-01T00:00:00+0900',
|
105
|
+
nil, {"stamp" => str2time('2014-01-01T00:00:00+0900')})
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_datestamp_rfc822_with_zone
|
109
|
+
internal_test_grok_pattern('%{DATESTAMP_RFC822:stamp:time}', 'Some stuff at Mon Aug 15 2005 15:52:01 UTC',
|
110
|
+
nil, {"stamp" => str2time('Mon Aug 15 2005 15:52:01 UTC')})
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_datestamp_rfc822_with_numeric_zone
|
114
|
+
internal_test_grok_pattern('%{DATESTAMP_RFC2822:stamp:time}', 'Some stuff at Mon, 15 Aug 2005 15:52:01 +0000',
|
115
|
+
nil, {"stamp" => str2time('Mon, 15 Aug 2005 15:52:01 +0000')})
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_syslogtimestamp
|
119
|
+
internal_test_grok_pattern('%{SYSLOGTIMESTAMP:stamp:time}', 'Some stuff at Aug 01 00:00:00',
|
120
|
+
nil, {"stamp" => str2time('Aug 01 00:00:00')})
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_timestamp_with_format
|
124
|
+
internal_test_grok_pattern('%{TIMESTAMP_ISO8601:stamp:time:%Y-%m-%d %H%M}', 'Some stuff at 2014-01-01 1000',
|
125
|
+
nil, {"stamp" => str2time('2014-01-01 10:00')})
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
73
129
|
private
|
74
130
|
|
75
131
|
def internal_test_grok_pattern(grok_pattern, text, expected_time, expected_record, options = {})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-grok-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kiyoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
requirements: []
|
110
110
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.
|
111
|
+
rubygems_version: 2.5.1
|
112
112
|
signing_key:
|
113
113
|
specification_version: 4
|
114
114
|
summary: Fluentd plugin to support Logstash-inspired Grok format for parsing logs
|