fluent-plugin-grok-parser 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3677fbf9646ab3f74f7e182c89d65bb8e12c7912
4
- data.tar.gz: b922f2cc49ad69e8c39f9c9b3948ad174a327f8e
3
+ metadata.gz: 939a55ebb2788709efae355b0cecb6109833c05f
4
+ data.tar.gz: 6b60181db56be7303078d767f0e40b8b980ddcd8
5
5
  SHA512:
6
- metadata.gz: 5154e8e2ee992db507da6fc4a6dde6aa57882a6bc12e228900793921d8234d1b266c572d5897a5c13fe84b187b6cb044cafffff4c74e10b8dc6836b947db6c68
7
- data.tar.gz: 34a14a13f62a00e3c6c148e9668e1a58ac77e0fce1a79f48ef94f8d7d3eb85ee95a581d130d5cdfd9790b4e14b30fe1d3efada3bd6db0f0432b03a4fce86f7bb
6
+ metadata.gz: 60a0f3d0cf432f1f0202baa92281bf19127105cf259624d670fbdb816b996a8baed8eb68d405767b3676cdb2ee886635815cdd44cba8fdd19a658e7159c1ff54
7
+ data.tar.gz: ffcb09c3c7c459daabe83de859048d7ac30b7e43fe95bf49594f9cb35b5d45d74cec862d5a61041f61c793cd9e1b93e81b53bd986fe4c94ef25cb0c5d21e32b0
data/README.md CHANGED
@@ -15,7 +15,7 @@ extracts the first IP address that matches in the log.
15
15
 
16
16
  ```aconf
17
17
  <source>
18
- type tail
18
+ @type tail
19
19
  path /path/to/log
20
20
  format grok
21
21
  grok_pattern %{IP:ip_address}
@@ -27,7 +27,7 @@ extracts the first IP address that matches in the log.
27
27
 
28
28
  ```aconf
29
29
  <source>
30
- type tail
30
+ @type tail
31
31
  path /path/to/log
32
32
  format grok
33
33
  <grok>
@@ -50,7 +50,7 @@ You can parse multiple line text.
50
50
 
51
51
  ```aconf
52
52
  <source>
53
- type tail
53
+ @type tail
54
54
  path /path/to/log
55
55
  format multiline_grok
56
56
  grok_pattern %{IP:ip_address}\n%{GREEDYDATA:message}
@@ -63,7 +63,7 @@ You can use multiple grok patterns to parse your data.
63
63
 
64
64
  ```aconf
65
65
  <source>
66
- type tail
66
+ @type tail
67
67
  path /path/to/log
68
68
  format multiline_grok
69
69
  <grok>
@@ -104,7 +104,7 @@ This is what the `custom_pattern_path` parameter is for.
104
104
 
105
105
  ```
106
106
  <source>
107
- type tail
107
+ @type tail
108
108
  path /path/to/log
109
109
  format grok
110
110
  grok_pattern %{MY_SUPER_PATTERN}
@@ -123,17 +123,15 @@ Although every parsed field has type `string` by default, you can specify other
123
123
  The syntax is
124
124
 
125
125
  ```
126
- types <field_name_1>:<type_name_1>,<field_name_2>:<type_name_2>,...
126
+ grok_pattern %{GROK_PATTERN:NAME:TYPE}...
127
127
  ```
128
128
 
129
129
  e.g.,
130
130
 
131
131
  ```
132
- types user_id:integer,paid:bool,paid_usd_amount:float
132
+ grok_pattern %{INT:foo:integer}
133
133
  ```
134
134
 
135
- As demonstrated above, "," is used to delimit field-type pairs while ":" is used to separate a field name with its intended type.
136
-
137
135
  Unspecified fields are parsed at the default string type.
138
136
 
139
137
  The list of supported types are shown below:
@@ -145,7 +143,7 @@ The list of supported types are shown below:
145
143
  * `time`
146
144
  * `array`
147
145
 
148
- For the `time` and `array` types, there is an optional third field after the type name. For the "time" type, you can specify a time format like you would in `time_format`.
146
+ For the `time` and `array` types, there is an optional 4th field after the type name. For the "time" type, you can specify a time format like you would in `time_format`.
149
147
 
150
148
  For the "array" type, the third field specifies the delimiter (the default is ","). For example, if a field called "item\_ids" contains the value "3,4,5", `types item_ids:array` parses it as ["3", "4", "5"]. Alternatively, if the value is "Adam|Alice|Bob", `types item_ids:array:|` parses it as ["Adam", "Alice", "Bob"].
151
149
 
@@ -153,11 +151,10 @@ Here is a sample config using the Grok parser with `in_tail` and the `types` par
153
151
 
154
152
  ```aconf
155
153
  <source>
156
- type tail
154
+ @type tail
157
155
  path /path/to/log
158
156
  format grok
159
- grok_pattern %{INT:user_id} paid %{NUMBER:paid_amount}
160
- types user_id:integer,paid_amount:float
157
+ grok_pattern %{INT:user_id:integer} paid %{NUMBER:paid_amount:float}
161
158
  tag payment
162
159
  </source>
163
160
  ```
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-grok-parser"
7
- spec.version = "0.0.4"
7
+ spec.version = "0.1.0"
8
8
  spec.authors = ["kiyoto"]
9
9
  spec.email = ["kiyoto@treasure-data.com"]
10
10
  spec.summary = %q{Fluentd plugin to support Logstash-inspired Grok format for parsing logs}
@@ -9,7 +9,10 @@ module Fluent
9
9
  /%\{ # match '%{' not prefixed with '\'
10
10
  (?<name> # match the pattern name
11
11
  (?<pattern>[A-z0-9]+)
12
- (?::(?<subname>[@\[\]A-z0-9_:.-]+))?
12
+ (?::(?<subname>[@\[\]A-z0-9_:.-]+?)
13
+ (?::(?<type>(?:string|bool|integer|float|
14
+ time(?::.+)?|
15
+ array(?::.)?)))?)?
13
16
  )
14
17
  \}/x
15
18
 
@@ -51,12 +54,15 @@ module Fluent
51
54
  private
52
55
 
53
56
  def expand_pattern_expression(grok_pattern, conf)
54
- regexp = expand_pattern(grok_pattern)
57
+ regexp, types = expand_pattern(grok_pattern)
55
58
  $log.info "Expanded the pattern #{conf['grok_pattern']} into #{regexp}"
56
59
  options = nil
57
60
  if @multiline_mode
58
61
  options = Regexp::MULTILINE
59
62
  end
63
+ unless types.empty?
64
+ conf["types"] = types.map{|subname,type| "#{subname}:#{type}" }.join(",")
65
+ end
60
66
  TextParser::RegexpParser.new(Regexp.new(regexp, options), conf)
61
67
  rescue GrokPatternNotFoundError => e
62
68
  raise e
@@ -66,20 +72,22 @@ module Fluent
66
72
 
67
73
  def expand_pattern(pattern)
68
74
  # It's okay to modify in place. no need to expand it more than once.
75
+ type_map = {}
69
76
  while true
70
77
  m = PATTERN_RE.match(pattern)
71
78
  break unless m
72
79
  curr_pattern = @pattern_map[m["pattern"]]
73
80
  raise GrokPatternNotFoundError unless curr_pattern
74
- replacement_pattern = if m["subname"]
75
- "(?<#{m["subname"]}>#{curr_pattern})"
76
- else
77
- curr_pattern
78
- end
81
+ if m["subname"]
82
+ replacement_pattern = "(?<#{m["subname"]}>#{curr_pattern})"
83
+ type_map[m["subname"]] = m["type"] || "string"
84
+ else
85
+ replacement_pattern = curr_pattern
86
+ end
79
87
  pattern.sub!(m[0]) do |s| replacement_pattern end
80
88
  end
81
89
 
82
- pattern
90
+ [pattern, type_map]
83
91
  end
84
92
  end
85
93
  end
@@ -6,8 +6,20 @@ module Fluent
6
6
 
7
7
  class GrokParser < Parser
8
8
  Plugin.register_parser('grok', self)
9
+
10
+ # For fluentd v0.12.16 or earlier
11
+ class << self
12
+ unless method_defined?(:desc)
13
+ def desc(description)
14
+ end
15
+ end
16
+ end
17
+
18
+ desc 'The format of the time field.'
9
19
  config_param :time_format, :string, :default => nil
20
+ desc 'The pattern of grok'
10
21
  config_param :grok_pattern, :string, :default => nil
22
+ desc 'Path to the file that includes custom grok patterns'
11
23
  config_param :custom_pattern_path, :string, :default => nil
12
24
 
13
25
  def initialize
@@ -4,8 +4,9 @@ module Fluent
4
4
  class TextParser
5
5
  class MultilineGrokParser < GrokParser
6
6
  Plugin.register_parser('multiline_grok', self)
7
- config_param :multiline_start_regexp, :string, :default => nil
8
7
 
8
+ desc 'The regexp to match beginning of multiline'
9
+ config_param :multiline_start_regexp, :string, :default => nil
9
10
 
10
11
  def initialize
11
12
  super
@@ -15,15 +15,26 @@ def str2time(str_time, format = nil)
15
15
  end
16
16
 
17
17
  class GrokParserTest < ::Test::Unit::TestCase
18
- def test_call_for_timestamp
19
- internal_test_grok_pattern('%{TIMESTAMP_ISO8601:time}', 'Some stuff at 2014-01-01T00:00:00+0900',
20
- str2time('2014-01-01T00:00:00+0900'), {})
21
- internal_test_grok_pattern('%{DATESTAMP_RFC822:time}', 'Some stuff at Mon Aug 15 2005 15:52:01 UTC',
22
- str2time('Mon Aug 15 2005 15:52:01 UTC'), {})
23
- internal_test_grok_pattern('%{DATESTAMP_RFC2822:time}', 'Some stuff at Mon, 15 Aug 2005 15:52:01 +0000',
24
- str2time('Mon, 15 Aug 2005 15:52:01 +0000'), {})
25
- internal_test_grok_pattern('%{SYSLOGTIMESTAMP:time}', 'Some stuff at Aug 01 00:00:00',
26
- str2time('Aug 01 00:00:00'), {})
18
+ class Timestamp < self
19
+ def test_timestamp_iso8601
20
+ internal_test_grok_pattern('%{TIMESTAMP_ISO8601:time}', 'Some stuff at 2014-01-01T00:00:00+0900',
21
+ str2time('2014-01-01T00:00:00+0900'), {})
22
+ end
23
+
24
+ def test_datestamp_rfc822_with_zone
25
+ internal_test_grok_pattern('%{DATESTAMP_RFC822:time}', 'Some stuff at Mon Aug 15 2005 15:52:01 UTC',
26
+ str2time('Mon Aug 15 2005 15:52:01 UTC'), {})
27
+ end
28
+
29
+ def test_datestamp_rfc822_with_numeric_zone
30
+ internal_test_grok_pattern('%{DATESTAMP_RFC2822:time}', 'Some stuff at Mon, 15 Aug 2005 15:52:01 +0000',
31
+ str2time('Mon, 15 Aug 2005 15:52:01 +0000'), {})
32
+ end
33
+
34
+ def test_syslogtimestamp
35
+ internal_test_grok_pattern('%{SYSLOGTIMESTAMP:time}', 'Some stuff at Aug 01 00:00:00',
36
+ str2time('Aug 01 00:00:00'), {})
37
+ end
27
38
  end
28
39
 
29
40
  def test_call_for_grok_pattern_not_found
@@ -70,6 +81,51 @@ class GrokParserTest < ::Test::Unit::TestCase
70
81
  end
71
82
  end
72
83
 
84
+ class OptionalType < self
85
+ def test_simple
86
+ internal_test_grok_pattern('%{INT:user_id:integer} paid %{NUMBER:paid_amount:float}',
87
+ '12345 paid 6789.10', nil,
88
+ {"user_id" => 12345, "paid_amount" => 6789.1 })
89
+ end
90
+
91
+ def test_array
92
+ internal_test_grok_pattern('%{GREEDYDATA:message:array}',
93
+ 'a,b,c,d', nil,
94
+ {"message" => %w(a b c d)})
95
+ end
96
+
97
+ def test_array_with_delimiter
98
+ internal_test_grok_pattern('%{GREEDYDATA:message:array:|}',
99
+ 'a|b|c|d', nil,
100
+ {"message" => %w(a b c d)})
101
+ end
102
+
103
+ def test_timestamp_iso8601
104
+ internal_test_grok_pattern('%{TIMESTAMP_ISO8601:stamp:time}', 'Some stuff at 2014-01-01T00:00:00+0900',
105
+ nil, {"stamp" => str2time('2014-01-01T00:00:00+0900')})
106
+ end
107
+
108
+ def test_datestamp_rfc822_with_zone
109
+ internal_test_grok_pattern('%{DATESTAMP_RFC822:stamp:time}', 'Some stuff at Mon Aug 15 2005 15:52:01 UTC',
110
+ nil, {"stamp" => str2time('Mon Aug 15 2005 15:52:01 UTC')})
111
+ end
112
+
113
+ def test_datestamp_rfc822_with_numeric_zone
114
+ internal_test_grok_pattern('%{DATESTAMP_RFC2822:stamp:time}', 'Some stuff at Mon, 15 Aug 2005 15:52:01 +0000',
115
+ nil, {"stamp" => str2time('Mon, 15 Aug 2005 15:52:01 +0000')})
116
+ end
117
+
118
+ def test_syslogtimestamp
119
+ internal_test_grok_pattern('%{SYSLOGTIMESTAMP:stamp:time}', 'Some stuff at Aug 01 00:00:00',
120
+ nil, {"stamp" => str2time('Aug 01 00:00:00')})
121
+ end
122
+
123
+ def test_timestamp_with_format
124
+ internal_test_grok_pattern('%{TIMESTAMP_ISO8601:stamp:time:%Y-%m-%d %H%M}', 'Some stuff at 2014-01-01 1000',
125
+ nil, {"stamp" => str2time('2014-01-01 10:00')})
126
+ end
127
+ end
128
+
73
129
  private
74
130
 
75
131
  def internal_test_grok_pattern(grok_pattern, text, expected_time, expected_record, options = {})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-grok-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kiyoto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-15 00:00:00.000000000 Z
11
+ date: 2015-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  requirements: []
110
110
  rubyforge_project:
111
- rubygems_version: 2.4.5.1
111
+ rubygems_version: 2.5.1
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: Fluentd plugin to support Logstash-inspired Grok format for parsing logs