logstash-filter-grok 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
5
- data.tar.gz: !binary |-
6
- YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
2
+ SHA1:
3
+ metadata.gz: 27006dbd92d0134cef4e01124e2ed8300134aeac
4
+ data.tar.gz: d571e84744111378dcae55c24293e6fdcd4590d0
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
10
- NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
11
- NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
12
- data.tar.gz: !binary |-
13
- ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
14
- NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
15
- YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
6
+ metadata.gz: 6c8f3172eddad9922c166ee17849475408e4f298f394d0a4672353b56ead744b5c80f4f8080f1c26bb2ed9242b9f7ef4309c66ca66f402d4c22c27f933daf655
7
+ data.tar.gz: d2cbe6637bb17b6707578c506cdc51ac64aa7720914fb01289b06cc95f456a8d7ed9cb204c88f03b544155a153b8063b85430cc18e0dadf7c70e30778acf250e
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source 'http://rubygems.org'
2
- gem 'rake'
3
- gem 'gem_publisher'
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+ gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5"
data/Rakefile CHANGED
@@ -1,6 +1,2 @@
1
- @files=[]
2
-
3
- task :default do
4
- system("rake -T")
5
- end
6
-
1
+ require "logstash/devutils/rake"
2
+ require "logstash/devutils/rake"
@@ -1,363 +1,359 @@
1
- # encoding: utf-8
2
- require "logstash/filters/base"
3
- require "logstash/namespace"
4
- require "logstash/environment"
5
- require "logstash/patterns/core"
6
- require "set"
7
-
8
- # Parse arbitrary text and structure it.
9
- #
10
- # Grok is currently the best way in logstash to parse crappy unstructured log
11
- # data into something structured and queryable.
12
- #
13
- # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
- # logs, and in general, any log format that is generally written for humans
15
- # and not computer consumption.
16
- #
17
- # Logstash ships with about 120 patterns by default. You can find them here:
18
- # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
- # your own trivially. (See the patterns_dir setting)
20
- #
21
- # If you need help building patterns to match your logs, you will find the
22
- # <http://grokdebug.herokuapp.com> too quite useful!
23
- #
24
- # #### Grok Basics
25
- #
26
- # Grok works by combining text patterns into something that matches your
27
- # logs.
28
- #
29
- # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
- #
31
- # The `SYNTAX` is the name of the pattern that will match your text. For
32
- # example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
33
- # be matched by the IP pattern. The syntax is how you match.
34
- #
35
- # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
- # For example, "3.44" could be the duration of an event, so you could call it
37
- # simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
38
- # making a request.
39
- #
40
- # For the above example, your grok filter would look something like this:
41
- #
42
- # %{NUMBER:duration} %{IP:client}
43
- #
44
- # Optionally you can add a data type conversion to your grok pattern. By default
45
- # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
- # for example change a string to an integer then suffix it with the target data type.
47
- # For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
48
- # integer. Currently the only supported conversions are `int` and `float`.
49
- #
50
- # #### Example
51
- #
52
- # With that idea of a syntax and semantic, we can pull out useful fields from a
53
- # sample log like this fictional http request log:
54
- #
55
- # 55.3.244.1 GET /index.html 15824 0.043
56
- #
57
- # The pattern for this could be:
58
- #
59
- # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
- #
61
- # A more realistic example, let's read these logs from a file:
62
- #
63
- # input {
64
- # file {
65
- # path => "/var/log/http.log"
66
- # }
67
- # }
68
- # filter {
69
- # grok {
70
- # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
- # }
72
- # }
73
- #
74
- # After the grok filter, the event will have a few extra fields in it:
75
- #
76
- # * client: 55.3.244.1
77
- # * method: GET
78
- # * request: /index.html
79
- # * bytes: 15824
80
- # * duration: 0.043
81
- #
82
- # #### Regular Expressions
83
- #
84
- # Grok sits on top of regular expressions, so any regular expressions are valid
85
- # in grok as well. The regular expression library is Oniguruma, and you can see
86
- # the full supported regexp syntax [on the Onigiruma
87
- # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
88
- #
89
- # #### Custom Patterns
90
- #
91
- # Sometimes logstash doesn't have a pattern you need. For this, you have
92
- # a few options.
93
- #
94
- # First, you can use the Oniguruma syntax for 'named capture' which will
95
- # let you match a piece of text and save it as a field:
96
- #
97
- # (?<field_name>the pattern here)
98
- #
99
- # For example, postfix logs have a 'queue id' that is an 10 or 11-character
100
- # hexadecimal value. I can capture that easily like this:
101
- #
102
- # (?<queue_id>[0-9A-F]{10,11})
103
- #
104
- # Alternately, you can create a custom patterns file.
105
- #
106
- # * Create a directory called `patterns` with a file in it called `extra`
107
- # (the file name doesn't matter, but name it meaningfully for yourself)
108
- # * In that file, write the pattern you need as the pattern name, a space, then
109
- # the regexp for that pattern.
110
- #
111
- # For example, doing the postfix queue id example as above:
112
- #
113
- # # contents of ./patterns/postfix:
114
- # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
- #
116
- # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
- # your custom patterns directory is. Here's a full example with a sample log:
118
- #
119
- # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
- #
121
- # filter {
122
- # grok {
123
- # patterns_dir => "./patterns"
124
- # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
125
- # }
126
- # }
127
- #
128
- # The above will match and result in the following fields:
129
- #
130
- # * timestamp: Jan 1 06:25:43
131
- # * logsource: mailserver14
132
- # * program: postfix/cleanup
133
- # * pid: 21403
134
- # * queue_id: BEF25A72965
135
- # * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
136
- #
137
- # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
- # SYSLOGBASE pattern which itself is defined by other patterns.
139
- class LogStash::Filters::Grok < LogStash::Filters::Base
140
- config_name "grok"
141
- milestone 3
142
-
143
- # Specify a pattern to parse with. This will match the 'message' field.
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+ require "logstash/environment"
5
+ require "logstash/patterns/core"
6
+ require "set"
7
+
8
+ # Parse arbitrary text and structure it.
144
9
  #
145
- # If you want to match other fields than message, use the 'match' setting.
146
- # Multiple patterns is fine.
147
- config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
-
149
- # A hash of matches of field => value
10
+ # Grok is currently the best way in logstash to parse crappy unstructured log
11
+ # data into something structured and queryable.
150
12
  #
151
- # For example:
13
+ # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
+ # logs, and in general, any log format that is generally written for humans
15
+ # and not computer consumption.
152
16
  #
153
- # filter {
154
- # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
- # }
17
+ # Logstash ships with about 120 patterns by default. You can find them here:
18
+ # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
+ # your own trivially. (See the `patterns_dir` setting)
20
+ #
21
+ # If you need help building patterns to match your logs, you will find the
22
+ # <http://grokdebug.herokuapp.com> too quite useful!
23
+ #
24
+ # ==== Grok Basics
25
+ #
26
+ # Grok works by combining text patterns into something that matches your
27
+ # logs.
28
+ #
29
+ # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
+ #
31
+ # The `SYNTAX` is the name of the pattern that will match your text. For
32
+ # example, `3.44` will be matched by the `NUMBER` pattern and `55.3.244.1` will
33
+ # be matched by the `IP` pattern. The syntax is how you match.
156
34
  #
157
- # Alternatively, using the old array syntax:
35
+ # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
+ # For example, `3.44` could be the duration of an event, so you could call it
37
+ # simply `duration`. Further, a string `55.3.244.1` might identify the `client`
38
+ # making a request.
158
39
  #
40
+ # For the above example, your grok filter would look something like this:
41
+ # [source,ruby]
42
+ # %{NUMBER:duration} %{IP:client}
43
+ #
44
+ # Optionally you can add a data type conversion to your grok pattern. By default
45
+ # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
+ # for example change a string to an integer then suffix it with the target data type.
47
+ # For example `%{NUMBER:num:int}` which converts the `num` semantic from a string to an
48
+ # integer. Currently the only supported conversions are `int` and `float`.
49
+ #
50
+ # .Examples:
51
+ #
52
+ # With that idea of a syntax and semantic, we can pull out useful fields from a
53
+ # sample log like this fictional http request log:
54
+ # [source,ruby]
55
+ # 55.3.244.1 GET /index.html 15824 0.043
56
+ #
57
+ # The pattern for this could be:
58
+ # [source,ruby]
59
+ # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
+ #
61
+ # A more realistic example, let's read these logs from a file:
62
+ # [source,ruby]
63
+ # input {
64
+ # file {
65
+ # path => "/var/log/http.log"
66
+ # }
67
+ # }
159
68
  # filter {
160
- # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
69
+ # grok {
70
+ # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
+ # }
161
72
  # }
162
73
  #
163
- config :match, :validate => :hash, :default => {}
164
-
74
+ # After the grok filter, the event will have a few extra fields in it:
75
+ #
76
+ # * `client: 55.3.244.1`
77
+ # * `method: GET`
78
+ # * `request: /index.html`
79
+ # * `bytes: 15824`
80
+ # * `duration: 0.043`
165
81
  #
166
- # logstash ships by default with a bunch of patterns, so you don't
167
- # necessarily need to define this yourself unless you are adding additional
168
- # patterns.
82
+ # ==== Regular Expressions
169
83
  #
170
- # Pattern files are plain text with format:
84
+ # Grok sits on top of regular expressions, so any regular expressions are valid
85
+ # in grok as well. The regular expression library is Oniguruma, and you can see
86
+ # the full supported regexp syntax [on the Onigiruma
87
+ # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
171
88
  #
172
- # NAME PATTERN
89
+ # ==== Custom Patterns
173
90
  #
174
- # For example:
91
+ # Sometimes logstash doesn't have a pattern you need. For this, you have
92
+ # a few options.
175
93
  #
176
- # NUMBER \d+
177
- config :patterns_dir, :validate => :array, :default => []
178
-
179
- # Drop if matched. Note, this feature may not stay. It is preferable to combine
180
- # grok + grep filters to do parsing + dropping.
181
- config :drop_if_match, :validate => :boolean, :default => false
182
-
183
- # Break on first match. The first successful match by grok will result in the
184
- # filter being finished. If you want grok to try all patterns (maybe you are
185
- # parsing different things), then set this to false.
186
- config :break_on_match, :validate => :boolean, :default => true
187
-
188
- # If true, only store named captures from grok.
189
- config :named_captures_only, :validate => :boolean, :default => true
190
-
191
- # If true, keep empty captures as event fields.
192
- config :keep_empty_captures, :validate => :boolean, :default => false
193
-
194
- # If true, make single-value fields simply that value, not an array
195
- # containing that one value.
196
- config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
197
-
198
- # Append values to the 'tags' field when there has been no
199
- # successful match
200
- config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
201
-
202
- # The fields to overwrite.
94
+ # First, you can use the Oniguruma syntax for named capture which will
95
+ # let you match a piece of text and save it as a field:
96
+ # [source,ruby]
97
+ # (?<field_name>the pattern here)
203
98
  #
204
- # This allows you to overwrite a value in a field that already exists.
99
+ # For example, postfix logs have a `queue id` that is an 10 or 11-character
100
+ # hexadecimal value. I can capture that easily like this:
101
+ # [source,ruby]
102
+ # (?<queue_id>[0-9A-F]{10,11})
205
103
  #
206
- # For example, if you have a syslog line in the 'message' field, you can
207
- # overwrite the 'message' field with part of the match like so:
104
+ # Alternately, you can create a custom patterns file.
208
105
  #
106
+ # * Create a directory called `patterns` with a file in it called `extra`
107
+ # (the file name doesn't matter, but name it meaningfully for yourself)
108
+ # * In that file, write the pattern you need as the pattern name, a space, then
109
+ # the regexp for that pattern.
110
+ #
111
+ # For example, doing the postfix queue id example as above:
112
+ # [source,ruby]
113
+ # # contents of ./patterns/postfix:
114
+ # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
+ #
116
+ # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
+ # your custom patterns directory is. Here's a full example with a sample log:
118
+ # [source,ruby]
119
+ # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
+ # [source,ruby]
209
121
  # filter {
210
122
  # grok {
211
- # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
212
- # overwrite => [ "message" ]
123
+ # patterns_dir => "./patterns"
124
+ # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
213
125
  # }
214
126
  # }
215
127
  #
216
- # In this case, a line like "May 29 16:37:11 sadness logger: hello world"
217
- # will be parsed and 'hello world' will overwrite the original message.
218
- config :overwrite, :validate => :array, :default => []
219
-
220
- # Detect if we are running from a jarfile, pick the right path.
221
- @@patterns_path ||= Set.new
222
- #@@patterns_path += [LogStash::Environment.pattern_path("*")]
223
- @@patterns_path += [LogStash::Patterns::Core.path]
224
-
225
- public
226
- def initialize(params)
227
- super(params)
228
- @match["message"] ||= []
229
- @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
230
- # a cache of capture name handler methods.
231
- @handlers = {}
232
- end
233
-
234
- public
235
- def register
236
- require "grok-pure" # rubygem 'jls-grok'
237
-
238
- @patternfiles = []
239
-
240
- # Have @@patterns_path show first. Last-in pattern definitions win; this
241
- # will let folks redefine built-in patterns at runtime.
242
- @patterns_dir = @@patterns_path.to_a + @patterns_dir
243
- @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
244
- @patterns_dir.each do |path|
245
- if File.directory?(path)
246
- path = File.join(path, "*")
247
- end
248
-
249
- Dir.glob(path).each do |file|
250
- @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
251
- @patternfiles << file
252
- end
128
+ # The above will match and result in the following fields:
129
+ #
130
+ # * `timestamp: Jan 1 06:25:43`
131
+ # * `logsource: mailserver14`
132
+ # * `program: postfix/cleanup`
133
+ # * `pid: 21403`
134
+ # * `queue_id: BEF25A72965`
135
+ # * `syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>`
136
+ #
137
+ # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
+ # `SYSLOGBASE` pattern which itself is defined by other patterns.
139
+ class LogStash::Filters::Grok < LogStash::Filters::Base
140
+ config_name "grok"
141
+ milestone 3
142
+
143
+ # Specify a pattern to parse with. This will match the `message` field.
144
+ #
145
+ # If you want to match other fields than message, use the `match` setting.
146
+ # Multiple patterns is fine.
147
+ config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
+
149
+ # A hash of matches of field => value
150
+ #
151
+ # For example:
152
+ # [source,ruby]
153
+ # filter {
154
+ # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
+ # }
156
+ #
157
+ # Alternatively, using the old array syntax:
158
+ # [source,ruby]
159
+ # filter {
160
+ # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
161
+ # }
162
+ #
163
+ config :match, :validate => :hash, :default => {}
164
+
165
+ #
166
+ # logstash ships by default with a bunch of patterns, so you don't
167
+ # necessarily need to define this yourself unless you are adding additional
168
+ # patterns.
169
+ #
170
+ # Pattern files are plain text with format:
171
+ # [source,ruby]
172
+ # NAME PATTERN
173
+ #
174
+ # For example:
175
+ # [source,ruby]
176
+ # NUMBER \d+
177
+ config :patterns_dir, :validate => :array, :default => []
178
+
179
+ # Break on first match. The first successful match by grok will result in the
180
+ # filter being finished. If you want grok to try all patterns (maybe you are
181
+ # parsing different things), then set this to false.
182
+ config :break_on_match, :validate => :boolean, :default => true
183
+
184
+ # If `true`, only store named captures from grok.
185
+ config :named_captures_only, :validate => :boolean, :default => true
186
+
187
+ # If `true`, keep empty captures as event fields.
188
+ config :keep_empty_captures, :validate => :boolean, :default => false
189
+
190
+ # If `true`, make single-value fields simply that value, not an array
191
+ # containing that one value.
192
+ config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
193
+
194
+ # Append values to the `tags` field when there has been no
195
+ # successful match
196
+ config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
197
+
198
+ # The fields to overwrite.
199
+ #
200
+ # This allows you to overwrite a value in a field that already exists.
201
+ #
202
+ # For example, if you have a syslog line in the `message` field, you can
203
+ # overwrite the `message` field with part of the match like so:
204
+ # [source,ruby]
205
+ # filter {
206
+ # grok {
207
+ # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
208
+ # overwrite => [ "message" ]
209
+ # }
210
+ # }
211
+ #
212
+ # In this case, a line like `May 29 16:37:11 sadness logger: hello world`
213
+ # will be parsed and `hello world` will overwrite the original message.
214
+ config :overwrite, :validate => :array, :default => []
215
+
216
+ # Detect if we are running from a jarfile, pick the right path.
217
+ @@patterns_path ||= Set.new
218
+ #@@patterns_path += [LogStash::Environment.pattern_path("*")]
219
+ @@patterns_path += [LogStash::Patterns::Core.path]
220
+
221
+ public
222
+ def initialize(params)
223
+ super(params)
224
+ @match["message"] ||= []
225
+ @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
226
+ # a cache of capture name handler methods.
227
+ @handlers = {}
253
228
  end
254
-
255
- @patterns = Hash.new { |h,k| h[k] = [] }
256
-
257
- @logger.info? and @logger.info("Match data", :match => @match)
258
-
259
- @match.each do |field, patterns|
260
- patterns = [patterns] if patterns.is_a?(String)
261
-
262
- @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
263
- patterns.each do |pattern|
264
- @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
265
- grok = Grok.new
266
- grok.logger = @logger unless @logger.nil?
267
- add_patterns_from_files(@patternfiles, grok)
268
- grok.compile(pattern, @named_captures_only)
269
- @patterns[field] << grok
229
+
230
+ public
231
+ def register
232
+ require "grok-pure" # rubygem 'jls-grok'
233
+
234
+ @patternfiles = []
235
+
236
+ # Have @@patterns_path show first. Last-in pattern definitions win; this
237
+ # will let folks redefine built-in patterns at runtime.
238
+ @patterns_dir = @@patterns_path.to_a + @patterns_dir
239
+ @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
240
+ @patterns_dir.each do |path|
241
+ if File.directory?(path)
242
+ path = File.join(path, "*")
243
+ end
244
+
245
+ Dir.glob(path).each do |file|
246
+ @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
247
+ @patternfiles << file
248
+ end
270
249
  end
271
- end # @match.each
272
- end # def register
273
-
274
- public
275
- def filter(event)
276
- return unless filter?(event)
277
-
278
- matched = false
279
- done = false
280
-
281
- @logger.debug? and @logger.debug("Running grok filter", :event => event);
282
- @patterns.each do |field, groks|
283
- if match(groks, field, event)
284
- matched = true
285
- break if @break_on_match
250
+
251
+ @patterns = Hash.new { |h,k| h[k] = [] }
252
+
253
+ @logger.info? and @logger.info("Match data", :match => @match)
254
+
255
+ @match.each do |field, patterns|
256
+ patterns = [patterns] if patterns.is_a?(String)
257
+
258
+ @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
259
+ patterns.each do |pattern|
260
+ @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
261
+ grok = Grok.new
262
+ grok.logger = @logger unless @logger.nil?
263
+ add_patterns_from_files(@patternfiles, grok)
264
+ grok.compile(pattern, @named_captures_only)
265
+ @patterns[field] << grok
266
+ end
267
+ end # @match.each
268
+ end # def register
269
+
270
+ public
271
+ def filter(event)
272
+ return unless filter?(event)
273
+
274
+ matched = false
275
+ done = false
276
+
277
+ @logger.debug? and @logger.debug("Running grok filter", :event => event);
278
+ @patterns.each do |field, groks|
279
+ if match(groks, field, event)
280
+ matched = true
281
+ break if @break_on_match
282
+ end
283
+ #break if done
284
+ end # @patterns.each
285
+
286
+ if matched
287
+ filter_matched(event)
288
+ else
289
+ # Tag this event if we can't parse it. We can use this later to
290
+ # reparse+reindex logs if we improve the patterns given.
291
+ @tag_on_failure.each do |tag|
292
+ event["tags"] ||= []
293
+ event["tags"] << tag unless event["tags"].include?(tag)
294
+ end
286
295
  end
287
- #break if done
288
- end # @patterns.each
289
-
290
- if matched
291
- filter_matched(event)
292
- else
293
- # Tag this event if we can't parse it. We can use this later to
294
- # reparse+reindex logs if we improve the patterns given.
295
- @tag_on_failure.each do |tag|
296
- event["tags"] ||= []
297
- event["tags"] << tag unless event["tags"].include?(tag)
296
+
297
+ @logger.debug? and @logger.debug("Event now: ", :event => event)
298
+ end # def filter
299
+
300
+ private
301
+ def match(groks, field, event)
302
+ input = event[field]
303
+ if input.is_a?(Array)
304
+ success = false
305
+ input.each do |input|
306
+ success |= match_against_groks(groks, input, event)
307
+ end
308
+ return success
309
+ else
310
+ return match_against_groks(groks, input, event)
298
311
  end
312
+ rescue StandardError => e
313
+ @logger.warn("Grok regexp threw exception", :exception => e.message)
299
314
  end
300
-
301
- @logger.debug? and @logger.debug("Event now: ", :event => event)
302
- end # def filter
303
-
304
- private
305
- def match(groks, field, event)
306
- input = event[field]
307
- if input.is_a?(Array)
308
- success = false
309
- input.each do |input|
310
- success |= match_against_groks(groks, input, event)
315
+
316
+ private
317
+ def match_against_groks(groks, input, event)
318
+ matched = false
319
+ groks.each do |grok|
320
+ # Convert anything else to string (number, hash, etc)
321
+ matched = grok.match_and_capture(input.to_s) do |field, value|
322
+ matched = true
323
+ handle(field, value, event)
324
+ end
325
+ break if matched and @break_on_match
311
326
  end
312
- return success
313
- else
314
- return match_against_groks(groks, input, event)
327
+ return matched
315
328
  end
316
- rescue StandardError => e
317
- @logger.warn("Grok regexp threw exception", :exception => e.message)
318
- end
319
-
320
- private
321
- def match_against_groks(groks, input, event)
322
- matched = false
323
- groks.each do |grok|
324
- # Convert anything else to string (number, hash, etc)
325
- matched = grok.match_and_capture(input.to_s) do |field, value|
326
- matched = true
327
- handle(field, value, event)
328
- end
329
- break if matched and @break_on_match
330
- end
331
- return matched
332
- end
333
-
334
- private
335
- def handle(field, value, event)
336
- return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
337
-
338
- if @overwrite.include?(field)
339
- event[field] = value
340
- else
341
- v = event[field]
342
- if v.nil?
329
+
330
+ private
331
+ def handle(field, value, event)
332
+ return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
333
+
334
+ if @overwrite.include?(field)
343
335
  event[field] = value
344
- elsif v.is_a?(Array)
345
- event[field] << value
346
- elsif v.is_a?(String)
347
- # Promote to array since we aren't overwriting.
348
- event[field] = [v, value]
336
+ else
337
+ v = event[field]
338
+ if v.nil?
339
+ event[field] = value
340
+ elsif v.is_a?(Array)
341
+ event[field] << value
342
+ elsif v.is_a?(String)
343
+ # Promote to array since we aren't overwriting.
344
+ event[field] = [v, value]
345
+ end
349
346
  end
350
347
  end
351
- end
352
-
353
- private
354
- def add_patterns_from_files(paths, grok)
355
- paths.each do |path|
356
- if !File.exists?(path)
357
- raise "Grok pattern file does not exist: #{path}"
348
+
349
+ private
350
+ def add_patterns_from_files(paths, grok)
351
+ paths.each do |path|
352
+ if !File.exists?(path)
353
+ raise "Grok pattern file does not exist: #{path}"
354
+ end
355
+ grok.add_patterns_from_file(path)
358
356
  end
359
- grok.add_patterns_from_file(path)
360
- end
361
- end # def add_patterns_from_files
362
-
363
- end # class LogStash::Filters::Grok
357
+ end # def add_patterns_from_files
358
+
359
+ end # class LogStash::Filters::Grok
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-grok'
4
- s.version = '0.1.0'
4
+ s.version = '0.1.2'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parse arbitrary text and structure it."
7
- s.description = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
7
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
8
8
  s.authors = ["Elasticsearch"]
9
- s.email = 'richard.pijnenburg@elasticsearch.com'
10
- s.homepage = "http://logstash.net/"
9
+ s.email = 'info@elasticsearch.com'
10
+ s.homepage = "http://www.elasticsearch.org/guide/en/logstash/current/index.html"
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
@@ -17,13 +17,15 @@ Gem::Specification.new do |s|
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
18
 
19
19
  # Special flag to let us know this is actually a logstash plugin
20
- s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
20
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
21
21
 
22
22
  # Gem dependencies
23
23
  s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
24
 
25
25
  s.add_runtime_dependency 'jls-grok', ['0.11.0']
26
26
  s.add_runtime_dependency 'logstash-patterns-core'
27
+ s.add_development_dependency 'logstash-devutils'
27
28
 
29
+ s.add_development_dependency 'logstash-devutils'
28
30
  end
29
31
 
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- require "spec_helper"
2
+ require "logstash/devutils/rspec/spec_helper"
3
3
  require "logstash/filters/grok"
4
4
 
5
5
  describe LogStash::Filters::Grok do
metadata CHANGED
@@ -1,66 +1,93 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-05 00:00:00.000000000 Z
11
+ date: 2014-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logstash
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
- - - ! '>='
16
+ - - '>='
18
17
  - !ruby/object:Gem::Version
19
18
  version: 1.4.0
20
19
  - - <
21
20
  - !ruby/object:Gem::Version
22
21
  version: 2.0.0
23
- type: :runtime
22
+ name: logstash
24
23
  prerelease: false
24
+ type: :runtime
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ! '>='
27
+ - - '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: 1.4.0
30
30
  - - <
31
31
  - !ruby/object:Gem::Version
32
32
  version: 2.0.0
33
33
  - !ruby/object:Gem::Dependency
34
- name: jls-grok
35
34
  requirement: !ruby/object:Gem::Requirement
36
35
  requirements:
37
36
  - - '='
38
37
  - !ruby/object:Gem::Version
39
38
  version: 0.11.0
40
- type: :runtime
39
+ name: jls-grok
41
40
  prerelease: false
41
+ type: :runtime
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - '='
45
45
  - !ruby/object:Gem::Version
46
46
  version: 0.11.0
47
47
  - !ruby/object:Gem::Dependency
48
- name: logstash-patterns-core
49
48
  requirement: !ruby/object:Gem::Requirement
50
49
  requirements:
51
- - - ! '>='
50
+ - - '>='
52
51
  - !ruby/object:Gem::Version
53
52
  version: '0'
53
+ name: logstash-patterns-core
54
+ prerelease: false
54
55
  type: :runtime
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ name: logstash-devutils
68
+ prerelease: false
69
+ type: :development
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ - !ruby/object:Gem::Dependency
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ name: logstash-devutils
55
82
  prerelease: false
83
+ type: :development
56
84
  version_requirements: !ruby/object:Gem::Requirement
57
85
  requirements:
58
- - - ! '>='
86
+ - - '>='
59
87
  - !ruby/object:Gem::Version
60
88
  version: '0'
61
- description: Grok is currently the best way in logstash to parse crappy unstructured
62
- log data into something structured and queryable.
63
- email: richard.pijnenburg@elasticsearch.com
89
+ description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
90
+ email: info@elasticsearch.com
64
91
  executables: []
65
92
  extensions: []
66
93
  extra_rdoc_files: []
@@ -71,33 +98,31 @@ files:
71
98
  - Rakefile
72
99
  - lib/logstash/filters/grok.rb
73
100
  - logstash-filter-grok.gemspec
74
- - rakelib/publish.rake
75
- - rakelib/vendor.rake
76
101
  - spec/filters/grok_spec.rb
77
- homepage: http://logstash.net/
102
+ homepage: http://www.elasticsearch.org/guide/en/logstash/current/index.html
78
103
  licenses:
79
104
  - Apache License (2.0)
80
105
  metadata:
81
106
  logstash_plugin: 'true'
82
- group: filter
83
- post_install_message:
107
+ logstash_group: filter
108
+ post_install_message:
84
109
  rdoc_options: []
85
110
  require_paths:
86
111
  - lib
87
112
  required_ruby_version: !ruby/object:Gem::Requirement
88
113
  requirements:
89
- - - ! '>='
114
+ - - '>='
90
115
  - !ruby/object:Gem::Version
91
116
  version: '0'
92
117
  required_rubygems_version: !ruby/object:Gem::Requirement
93
118
  requirements:
94
- - - ! '>='
119
+ - - '>='
95
120
  - !ruby/object:Gem::Version
96
121
  version: '0'
97
122
  requirements: []
98
- rubyforge_project:
99
- rubygems_version: 2.4.1
100
- signing_key:
123
+ rubyforge_project:
124
+ rubygems_version: 2.1.9
125
+ signing_key:
101
126
  specification_version: 4
102
127
  summary: Parse arbitrary text and structure it.
103
128
  test_files:
data/rakelib/publish.rake DELETED
@@ -1,9 +0,0 @@
1
- require "gem_publisher"
2
-
3
- desc "Publish gem to RubyGems.org"
4
- task :publish_gem do |t|
5
- gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
- gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
- puts "Published #{gem}" if gem
8
- end
9
-
data/rakelib/vendor.rake DELETED
@@ -1,169 +0,0 @@
1
- require "net/http"
2
- require "uri"
3
- require "digest/sha1"
4
-
5
- def vendor(*args)
6
- return File.join("vendor", *args)
7
- end
8
-
9
- directory "vendor/" => ["vendor"] do |task, args|
10
- mkdir task.name
11
- end
12
-
13
- def fetch(url, sha1, output)
14
-
15
- puts "Downloading #{url}"
16
- actual_sha1 = download(url, output)
17
-
18
- if actual_sha1 != sha1
19
- fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
- end
21
- end # def fetch
22
-
23
- def file_fetch(url, sha1)
24
- filename = File.basename( URI(url).path )
25
- output = "vendor/#{filename}"
26
- task output => [ "vendor/" ] do
27
- begin
28
- actual_sha1 = file_sha1(output)
29
- if actual_sha1 != sha1
30
- fetch(url, sha1, output)
31
- end
32
- rescue Errno::ENOENT
33
- fetch(url, sha1, output)
34
- end
35
- end.invoke
36
-
37
- return output
38
- end
39
-
40
- def file_sha1(path)
41
- digest = Digest::SHA1.new
42
- fd = File.new(path, "r")
43
- while true
44
- begin
45
- digest << fd.sysread(16384)
46
- rescue EOFError
47
- break
48
- end
49
- end
50
- return digest.hexdigest
51
- ensure
52
- fd.close if fd
53
- end
54
-
55
- def download(url, output)
56
- uri = URI(url)
57
- digest = Digest::SHA1.new
58
- tmp = "#{output}.tmp"
59
- Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
- request = Net::HTTP::Get.new(uri.path)
61
- http.request(request) do |response|
62
- fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
- size = (response["content-length"].to_i || -1).to_f
64
- count = 0
65
- File.open(tmp, "w") do |fd|
66
- response.read_body do |chunk|
67
- fd.write(chunk)
68
- digest << chunk
69
- if size > 0 && $stdout.tty?
70
- count += chunk.bytesize
71
- $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
- end
73
- end
74
- end
75
- $stdout.write("\r \r") if $stdout.tty?
76
- end
77
- end
78
-
79
- File.rename(tmp, output)
80
-
81
- return digest.hexdigest
82
- rescue SocketError => e
83
- puts "Failure while downloading #{url}: #{e}"
84
- raise
85
- ensure
86
- File.unlink(tmp) if File.exist?(tmp)
87
- end # def download
88
-
89
- def untar(tarball, &block)
90
- require "archive/tar/minitar"
91
- tgz = Zlib::GzipReader.new(File.open(tarball))
92
- # Pull out typesdb
93
- tar = Archive::Tar::Minitar::Input.open(tgz)
94
- tar.each do |entry|
95
- path = block.call(entry)
96
- next if path.nil?
97
- parent = File.dirname(path)
98
-
99
- mkdir_p parent unless File.directory?(parent)
100
-
101
- # Skip this file if the output file is the same size
102
- if entry.directory?
103
- mkdir path unless File.directory?(path)
104
- else
105
- entry_mode = entry.instance_eval { @mode } & 0777
106
- if File.exists?(path)
107
- stat = File.stat(path)
108
- # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
- # expose headers in the entry.
110
- entry_size = entry.instance_eval { @size }
111
- # If file sizes are same, skip writing.
112
- next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
- end
114
- puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
- File.open(path, "w") do |fd|
116
- # eof? check lets us skip empty files. Necessary because the API provided by
117
- # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
- # IO object. Something about empty files in this EntryStream causes
119
- # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
- # TODO(sissel): File a bug about this.
121
- while !entry.eof?
122
- chunk = entry.read(16384)
123
- fd.write(chunk)
124
- end
125
- #IO.copy_stream(entry, fd)
126
- end
127
- File.chmod(entry_mode, path)
128
- end
129
- end
130
- tar.close
131
- File.unlink(tarball) if File.file?(tarball)
132
- end # def untar
133
-
134
- def ungz(file)
135
-
136
- outpath = file.gsub('.gz', '')
137
- tgz = Zlib::GzipReader.new(File.open(file))
138
- begin
139
- File.open(outpath, "w") do |out|
140
- IO::copy_stream(tgz, out)
141
- end
142
- File.unlink(file)
143
- rescue
144
- File.unlink(outpath) if File.file?(outpath)
145
- raise
146
- end
147
- tgz.close
148
- end
149
-
150
- desc "Process any vendor files required for this plugin"
151
- task "vendor" do |task, args|
152
-
153
- @files.each do |file|
154
- download = file_fetch(file['url'], file['sha1'])
155
- if download =~ /.tar.gz/
156
- prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
- untar(download) do |entry|
158
- if !file['files'].nil?
159
- next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
- out = entry.full_name.split("/").last
161
- end
162
- File.join('vendor', out)
163
- end
164
- elsif download =~ /.gz/
165
- ungz(download)
166
- end
167
- end
168
-
169
- end