logstash-filter-grok 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +3 -0
- data/LICENSE +13 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/grok.rb +363 -0
- data/logstash-filter-grok.gemspec +29 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/grok_spec.rb +648 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
|
10
|
+
NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
|
11
|
+
NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
|
14
|
+
NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
|
15
|
+
YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,363 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/environment"
|
5
|
+
require "logstash/patterns/core"
|
6
|
+
require "set"
|
7
|
+
|
8
|
+
# Parse arbitrary text and structure it.
|
9
|
+
#
|
10
|
+
# Grok is currently the best way in logstash to parse crappy unstructured log
|
11
|
+
# data into something structured and queryable.
|
12
|
+
#
|
13
|
+
# This tool is perfect for syslog logs, apache and other webserver logs, mysql
|
14
|
+
# logs, and in general, any log format that is generally written for humans
|
15
|
+
# and not computer consumption.
|
16
|
+
#
|
17
|
+
# Logstash ships with about 120 patterns by default. You can find them here:
|
18
|
+
# <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
|
19
|
+
# your own trivially. (See the patterns_dir setting)
|
20
|
+
#
|
21
|
+
# If you need help building patterns to match your logs, you will find the
|
22
|
+
# <http://grokdebug.herokuapp.com> too quite useful!
|
23
|
+
#
|
24
|
+
# #### Grok Basics
|
25
|
+
#
|
26
|
+
# Grok works by combining text patterns into something that matches your
|
27
|
+
# logs.
|
28
|
+
#
|
29
|
+
# The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
|
30
|
+
#
|
31
|
+
# The `SYNTAX` is the name of the pattern that will match your text. For
|
32
|
+
# example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
|
33
|
+
# be matched by the IP pattern. The syntax is how you match.
|
34
|
+
#
|
35
|
+
# The `SEMANTIC` is the identifier you give to the piece of text being matched.
|
36
|
+
# For example, "3.44" could be the duration of an event, so you could call it
|
37
|
+
# simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
|
38
|
+
# making a request.
|
39
|
+
#
|
40
|
+
# For the above example, your grok filter would look something like this:
|
41
|
+
#
|
42
|
+
# %{NUMBER:duration} %{IP:client}
|
43
|
+
#
|
44
|
+
# Optionally you can add a data type conversion to your grok pattern. By default
|
45
|
+
# all semantics are saved as strings. If you wish to convert a semantic's data type,
|
46
|
+
# for example change a string to an integer then suffix it with the target data type.
|
47
|
+
# For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
|
48
|
+
# integer. Currently the only supported conversions are `int` and `float`.
|
49
|
+
#
|
50
|
+
# #### Example
|
51
|
+
#
|
52
|
+
# With that idea of a syntax and semantic, we can pull out useful fields from a
|
53
|
+
# sample log like this fictional http request log:
|
54
|
+
#
|
55
|
+
# 55.3.244.1 GET /index.html 15824 0.043
|
56
|
+
#
|
57
|
+
# The pattern for this could be:
|
58
|
+
#
|
59
|
+
# %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
|
60
|
+
#
|
61
|
+
# A more realistic example, let's read these logs from a file:
|
62
|
+
#
|
63
|
+
# input {
|
64
|
+
# file {
|
65
|
+
# path => "/var/log/http.log"
|
66
|
+
# }
|
67
|
+
# }
|
68
|
+
# filter {
|
69
|
+
# grok {
|
70
|
+
# match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
|
71
|
+
# }
|
72
|
+
# }
|
73
|
+
#
|
74
|
+
# After the grok filter, the event will have a few extra fields in it:
|
75
|
+
#
|
76
|
+
# * client: 55.3.244.1
|
77
|
+
# * method: GET
|
78
|
+
# * request: /index.html
|
79
|
+
# * bytes: 15824
|
80
|
+
# * duration: 0.043
|
81
|
+
#
|
82
|
+
# #### Regular Expressions
|
83
|
+
#
|
84
|
+
# Grok sits on top of regular expressions, so any regular expressions are valid
|
85
|
+
# in grok as well. The regular expression library is Oniguruma, and you can see
|
86
|
+
# the full supported regexp syntax [on the Onigiruma
|
87
|
+
# site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
|
88
|
+
#
|
89
|
+
# #### Custom Patterns
|
90
|
+
#
|
91
|
+
# Sometimes logstash doesn't have a pattern you need. For this, you have
|
92
|
+
# a few options.
|
93
|
+
#
|
94
|
+
# First, you can use the Oniguruma syntax for 'named capture' which will
|
95
|
+
# let you match a piece of text and save it as a field:
|
96
|
+
#
|
97
|
+
# (?<field_name>the pattern here)
|
98
|
+
#
|
99
|
+
# For example, postfix logs have a 'queue id' that is an 10 or 11-character
|
100
|
+
# hexadecimal value. I can capture that easily like this:
|
101
|
+
#
|
102
|
+
# (?<queue_id>[0-9A-F]{10,11})
|
103
|
+
#
|
104
|
+
# Alternately, you can create a custom patterns file.
|
105
|
+
#
|
106
|
+
# * Create a directory called `patterns` with a file in it called `extra`
|
107
|
+
# (the file name doesn't matter, but name it meaningfully for yourself)
|
108
|
+
# * In that file, write the pattern you need as the pattern name, a space, then
|
109
|
+
# the regexp for that pattern.
|
110
|
+
#
|
111
|
+
# For example, doing the postfix queue id example as above:
|
112
|
+
#
|
113
|
+
# # contents of ./patterns/postfix:
|
114
|
+
# POSTFIX_QUEUEID [0-9A-F]{10,11}
|
115
|
+
#
|
116
|
+
# Then use the `patterns_dir` setting in this plugin to tell logstash where
|
117
|
+
# your custom patterns directory is. Here's a full example with a sample log:
|
118
|
+
#
|
119
|
+
# Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
|
120
|
+
#
|
121
|
+
# filter {
|
122
|
+
# grok {
|
123
|
+
# patterns_dir => "./patterns"
|
124
|
+
# match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
|
125
|
+
# }
|
126
|
+
# }
|
127
|
+
#
|
128
|
+
# The above will match and result in the following fields:
|
129
|
+
#
|
130
|
+
# * timestamp: Jan 1 06:25:43
|
131
|
+
# * logsource: mailserver14
|
132
|
+
# * program: postfix/cleanup
|
133
|
+
# * pid: 21403
|
134
|
+
# * queue_id: BEF25A72965
|
135
|
+
# * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
|
136
|
+
#
|
137
|
+
# The `timestamp`, `logsource`, `program`, and `pid` fields come from the
|
138
|
+
# SYSLOGBASE pattern which itself is defined by other patterns.
|
139
|
+
class LogStash::Filters::Grok < LogStash::Filters::Base
|
140
|
+
config_name "grok"
|
141
|
+
milestone 3
|
142
|
+
|
143
|
+
# Specify a pattern to parse with. This will match the 'message' field.
|
144
|
+
#
|
145
|
+
# If you want to match other fields than message, use the 'match' setting.
|
146
|
+
# Multiple patterns is fine.
|
147
|
+
config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
|
148
|
+
|
149
|
+
# A hash of matches of field => value
|
150
|
+
#
|
151
|
+
# For example:
|
152
|
+
#
|
153
|
+
# filter {
|
154
|
+
# grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
|
155
|
+
# }
|
156
|
+
#
|
157
|
+
# Alternatively, using the old array syntax:
|
158
|
+
#
|
159
|
+
# filter {
|
160
|
+
# grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
|
161
|
+
# }
|
162
|
+
#
|
163
|
+
config :match, :validate => :hash, :default => {}
|
164
|
+
|
165
|
+
#
|
166
|
+
# logstash ships by default with a bunch of patterns, so you don't
|
167
|
+
# necessarily need to define this yourself unless you are adding additional
|
168
|
+
# patterns.
|
169
|
+
#
|
170
|
+
# Pattern files are plain text with format:
|
171
|
+
#
|
172
|
+
# NAME PATTERN
|
173
|
+
#
|
174
|
+
# For example:
|
175
|
+
#
|
176
|
+
# NUMBER \d+
|
177
|
+
config :patterns_dir, :validate => :array, :default => []
|
178
|
+
|
179
|
+
# Drop if matched. Note, this feature may not stay. It is preferable to combine
|
180
|
+
# grok + grep filters to do parsing + dropping.
|
181
|
+
config :drop_if_match, :validate => :boolean, :default => false
|
182
|
+
|
183
|
+
# Break on first match. The first successful match by grok will result in the
|
184
|
+
# filter being finished. If you want grok to try all patterns (maybe you are
|
185
|
+
# parsing different things), then set this to false.
|
186
|
+
config :break_on_match, :validate => :boolean, :default => true
|
187
|
+
|
188
|
+
# If true, only store named captures from grok.
|
189
|
+
config :named_captures_only, :validate => :boolean, :default => true
|
190
|
+
|
191
|
+
# If true, keep empty captures as event fields.
|
192
|
+
config :keep_empty_captures, :validate => :boolean, :default => false
|
193
|
+
|
194
|
+
# If true, make single-value fields simply that value, not an array
|
195
|
+
# containing that one value.
|
196
|
+
config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
|
197
|
+
|
198
|
+
# Append values to the 'tags' field when there has been no
|
199
|
+
# successful match
|
200
|
+
config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
|
201
|
+
|
202
|
+
# The fields to overwrite.
|
203
|
+
#
|
204
|
+
# This allows you to overwrite a value in a field that already exists.
|
205
|
+
#
|
206
|
+
# For example, if you have a syslog line in the 'message' field, you can
|
207
|
+
# overwrite the 'message' field with part of the match like so:
|
208
|
+
#
|
209
|
+
# filter {
|
210
|
+
# grok {
|
211
|
+
# match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
|
212
|
+
# overwrite => [ "message" ]
|
213
|
+
# }
|
214
|
+
# }
|
215
|
+
#
|
216
|
+
# In this case, a line like "May 29 16:37:11 sadness logger: hello world"
|
217
|
+
# will be parsed and 'hello world' will overwrite the original message.
|
218
|
+
config :overwrite, :validate => :array, :default => []
|
219
|
+
|
220
|
+
# Detect if we are running from a jarfile, pick the right path.
|
221
|
+
@@patterns_path ||= Set.new
|
222
|
+
#@@patterns_path += [LogStash::Environment.pattern_path("*")]
|
223
|
+
@@patterns_path += [LogStash::Patterns::Core.path]
|
224
|
+
|
225
|
+
public
|
226
|
+
def initialize(params)
|
227
|
+
super(params)
|
228
|
+
@match["message"] ||= []
|
229
|
+
@match["message"] += @pattern if @pattern # the config 'pattern' value (array)
|
230
|
+
# a cache of capture name handler methods.
|
231
|
+
@handlers = {}
|
232
|
+
end
|
233
|
+
|
234
|
+
public
|
235
|
+
def register
|
236
|
+
require "grok-pure" # rubygem 'jls-grok'
|
237
|
+
|
238
|
+
@patternfiles = []
|
239
|
+
|
240
|
+
# Have @@patterns_path show first. Last-in pattern definitions win; this
|
241
|
+
# will let folks redefine built-in patterns at runtime.
|
242
|
+
@patterns_dir = @@patterns_path.to_a + @patterns_dir
|
243
|
+
@logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
|
244
|
+
@patterns_dir.each do |path|
|
245
|
+
if File.directory?(path)
|
246
|
+
path = File.join(path, "*")
|
247
|
+
end
|
248
|
+
|
249
|
+
Dir.glob(path).each do |file|
|
250
|
+
@logger.info? and @logger.info("Grok loading patterns from file", :path => file)
|
251
|
+
@patternfiles << file
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
@patterns = Hash.new { |h,k| h[k] = [] }
|
256
|
+
|
257
|
+
@logger.info? and @logger.info("Match data", :match => @match)
|
258
|
+
|
259
|
+
@match.each do |field, patterns|
|
260
|
+
patterns = [patterns] if patterns.is_a?(String)
|
261
|
+
|
262
|
+
@logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
|
263
|
+
patterns.each do |pattern|
|
264
|
+
@logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
|
265
|
+
grok = Grok.new
|
266
|
+
grok.logger = @logger unless @logger.nil?
|
267
|
+
add_patterns_from_files(@patternfiles, grok)
|
268
|
+
grok.compile(pattern, @named_captures_only)
|
269
|
+
@patterns[field] << grok
|
270
|
+
end
|
271
|
+
end # @match.each
|
272
|
+
end # def register
|
273
|
+
|
274
|
+
public
|
275
|
+
def filter(event)
|
276
|
+
return unless filter?(event)
|
277
|
+
|
278
|
+
matched = false
|
279
|
+
done = false
|
280
|
+
|
281
|
+
@logger.debug? and @logger.debug("Running grok filter", :event => event);
|
282
|
+
@patterns.each do |field, groks|
|
283
|
+
if match(groks, field, event)
|
284
|
+
matched = true
|
285
|
+
break if @break_on_match
|
286
|
+
end
|
287
|
+
#break if done
|
288
|
+
end # @patterns.each
|
289
|
+
|
290
|
+
if matched
|
291
|
+
filter_matched(event)
|
292
|
+
else
|
293
|
+
# Tag this event if we can't parse it. We can use this later to
|
294
|
+
# reparse+reindex logs if we improve the patterns given.
|
295
|
+
@tag_on_failure.each do |tag|
|
296
|
+
event["tags"] ||= []
|
297
|
+
event["tags"] << tag unless event["tags"].include?(tag)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
@logger.debug? and @logger.debug("Event now: ", :event => event)
|
302
|
+
end # def filter
|
303
|
+
|
304
|
+
private
|
305
|
+
def match(groks, field, event)
|
306
|
+
input = event[field]
|
307
|
+
if input.is_a?(Array)
|
308
|
+
success = false
|
309
|
+
input.each do |input|
|
310
|
+
success |= match_against_groks(groks, input, event)
|
311
|
+
end
|
312
|
+
return success
|
313
|
+
else
|
314
|
+
return match_against_groks(groks, input, event)
|
315
|
+
end
|
316
|
+
rescue StandardError => e
|
317
|
+
@logger.warn("Grok regexp threw exception", :exception => e.message)
|
318
|
+
end
|
319
|
+
|
320
|
+
private
|
321
|
+
def match_against_groks(groks, input, event)
|
322
|
+
matched = false
|
323
|
+
groks.each do |grok|
|
324
|
+
# Convert anything else to string (number, hash, etc)
|
325
|
+
matched = grok.match_and_capture(input.to_s) do |field, value|
|
326
|
+
matched = true
|
327
|
+
handle(field, value, event)
|
328
|
+
end
|
329
|
+
break if matched and @break_on_match
|
330
|
+
end
|
331
|
+
return matched
|
332
|
+
end
|
333
|
+
|
334
|
+
private
|
335
|
+
def handle(field, value, event)
|
336
|
+
return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
|
337
|
+
|
338
|
+
if @overwrite.include?(field)
|
339
|
+
event[field] = value
|
340
|
+
else
|
341
|
+
v = event[field]
|
342
|
+
if v.nil?
|
343
|
+
event[field] = value
|
344
|
+
elsif v.is_a?(Array)
|
345
|
+
event[field] << value
|
346
|
+
elsif v.is_a?(String)
|
347
|
+
# Promote to array since we aren't overwriting.
|
348
|
+
event[field] = [v, value]
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
private
|
354
|
+
def add_patterns_from_files(paths, grok)
|
355
|
+
paths.each do |path|
|
356
|
+
if !File.exists?(path)
|
357
|
+
raise "Grok pattern file does not exist: #{path}"
|
358
|
+
end
|
359
|
+
grok.add_patterns_from_file(path)
|
360
|
+
end
|
361
|
+
end # def add_patterns_from_files
|
362
|
+
|
363
|
+
end # class LogStash::Filters::Grok
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-filter-grok'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "Parse arbitrary text and structure it."
|
7
|
+
s.description = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
|
25
|
+
s.add_runtime_dependency 'jls-grok', ['0.11.0']
|
26
|
+
s.add_runtime_dependency 'logstash-patterns-core'
|
27
|
+
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
@@ -0,0 +1,648 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
require "logstash/filters/grok"
|
4
|
+
|
5
|
+
describe LogStash::Filters::Grok do
|
6
|
+
|
7
|
+
describe "simple syslog line" do
|
8
|
+
# The logstash config goes here.
|
9
|
+
# At this time, only filters are supported.
|
10
|
+
config <<-CONFIG
|
11
|
+
filter {
|
12
|
+
grok {
|
13
|
+
match => { "message" => "%{SYSLOGLINE}" }
|
14
|
+
singles => true
|
15
|
+
overwrite => [ "message" ]
|
16
|
+
}
|
17
|
+
}
|
18
|
+
CONFIG
|
19
|
+
|
20
|
+
sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
|
21
|
+
insist { subject["tags"] }.nil?
|
22
|
+
insist { subject["logsource"] } == "evita"
|
23
|
+
insist { subject["timestamp"] } == "Mar 16 00:01:25"
|
24
|
+
insist { subject["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
|
25
|
+
insist { subject["program"] } == "postfix/smtpd"
|
26
|
+
insist { subject["pid"] } == "1713"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "ietf 5424 syslog line" do
|
31
|
+
# The logstash config goes here.
|
32
|
+
# At this time, only filters are supported.
|
33
|
+
config <<-CONFIG
|
34
|
+
filter {
|
35
|
+
grok {
|
36
|
+
match => { "message" => "%{SYSLOG5424LINE}" }
|
37
|
+
singles => true
|
38
|
+
}
|
39
|
+
}
|
40
|
+
CONFIG
|
41
|
+
|
42
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\"bar\"][id2 baz=\"something\"] Hello, syslog." do
|
43
|
+
insist { subject["tags"] }.nil?
|
44
|
+
insist { subject["syslog5424_pri"] } == "191"
|
45
|
+
insist { subject["syslog5424_ver"] } == "1"
|
46
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
47
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
48
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
49
|
+
insist { subject["syslog5424_proc"] } == "4123"
|
50
|
+
insist { subject["syslog5424_msgid"] } == nil
|
51
|
+
insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"][id2 baz=\"something\"]"
|
52
|
+
insist { subject["syslog5424_msg"] } == "Hello, syslog."
|
53
|
+
end
|
54
|
+
|
55
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - [id1 foo=\"bar\"] No process ID." do
|
56
|
+
insist { subject["tags"] }.nil?
|
57
|
+
insist { subject["syslog5424_pri"] } == "191"
|
58
|
+
insist { subject["syslog5424_ver"] } == "1"
|
59
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
60
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
61
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
62
|
+
insist { subject["syslog5424_proc"] } == nil
|
63
|
+
insist { subject["syslog5424_msgid"] } == nil
|
64
|
+
insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"]"
|
65
|
+
insist { subject["syslog5424_msg"] } == "No process ID."
|
66
|
+
end
|
67
|
+
|
68
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - No structured data." do
|
69
|
+
insist { subject["tags"] }.nil?
|
70
|
+
insist { subject["syslog5424_pri"] } == "191"
|
71
|
+
insist { subject["syslog5424_ver"] } == "1"
|
72
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
73
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
74
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
75
|
+
insist { subject["syslog5424_proc"] } == "4123"
|
76
|
+
insist { subject["syslog5424_msgid"] } == nil
|
77
|
+
insist { subject["syslog5424_sd"] } == nil
|
78
|
+
insist { subject["syslog5424_msg"] } == "No structured data."
|
79
|
+
end
|
80
|
+
|
81
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - - No PID or SD." do
|
82
|
+
insist { subject["tags"] }.nil?
|
83
|
+
insist { subject["syslog5424_pri"] } == "191"
|
84
|
+
insist { subject["syslog5424_ver"] } == "1"
|
85
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
86
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
87
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
88
|
+
insist { subject["syslog5424_proc"] } == nil
|
89
|
+
insist { subject["syslog5424_msgid"] } == nil
|
90
|
+
insist { subject["syslog5424_sd"] } == nil
|
91
|
+
insist { subject["syslog5424_msg"] } == "No PID or SD."
|
92
|
+
end
|
93
|
+
|
94
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Missing structured data." do
|
95
|
+
insist { subject["tags"] }.nil?
|
96
|
+
insist { subject["syslog5424_pri"] } == "191"
|
97
|
+
insist { subject["syslog5424_ver"] } == "1"
|
98
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
99
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
100
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
101
|
+
insist { subject["syslog5424_proc"] } == "4123"
|
102
|
+
insist { subject["syslog5424_msgid"] } == nil
|
103
|
+
insist { subject["syslog5424_sd"] } == nil
|
104
|
+
insist { subject["syslog5424_msg"] } == "Missing structured data."
|
105
|
+
end
|
106
|
+
|
107
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - Additional spaces." do
|
108
|
+
insist { subject["tags"] }.nil?
|
109
|
+
insist { subject["syslog5424_pri"] } == "191"
|
110
|
+
insist { subject["syslog5424_ver"] } == "1"
|
111
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
112
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
113
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
114
|
+
insist { subject["syslog5424_proc"] } == "4123"
|
115
|
+
insist { subject["syslog5424_msgid"] } == nil
|
116
|
+
insist { subject["syslog5424_sd"] } == nil
|
117
|
+
insist { subject["syslog5424_msg"] } == "Additional spaces."
|
118
|
+
end
|
119
|
+
|
120
|
+
sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Additional spaces and missing SD." do
|
121
|
+
insist { subject["tags"] }.nil?
|
122
|
+
insist { subject["syslog5424_pri"] } == "191"
|
123
|
+
insist { subject["syslog5424_ver"] } == "1"
|
124
|
+
insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
|
125
|
+
insist { subject["syslog5424_host"] } == "paxton.local"
|
126
|
+
insist { subject["syslog5424_app"] } == "grokdebug"
|
127
|
+
insist { subject["syslog5424_proc"] } == "4123"
|
128
|
+
insist { subject["syslog5424_msgid"] } == nil
|
129
|
+
insist { subject["syslog5424_sd"] } == nil
|
130
|
+
insist { subject["syslog5424_msg"] } == "Additional spaces and missing SD."
|
131
|
+
end
|
132
|
+
|
133
|
+
sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 dnsmasq-dhcp 8048 - - Appname contains a dash" do
|
134
|
+
insist { subject["tags"] }.nil?
|
135
|
+
insist { subject["syslog5424_pri"] } == "30"
|
136
|
+
insist { subject["syslog5424_ver"] } == "1"
|
137
|
+
insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
|
138
|
+
insist { subject["syslog5424_host"] } == "osctrl01"
|
139
|
+
insist { subject["syslog5424_app"] } == "dnsmasq-dhcp"
|
140
|
+
insist { subject["syslog5424_proc"] } == "8048"
|
141
|
+
insist { subject["syslog5424_msgid"] } == nil
|
142
|
+
insist { subject["syslog5424_sd"] } == nil
|
143
|
+
insist { subject["syslog5424_msg"] } == "Appname contains a dash"
|
144
|
+
end
|
145
|
+
|
146
|
+
sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 - 8048 - - Appname is nil" do
|
147
|
+
insist { subject["tags"] }.nil?
|
148
|
+
insist { subject["syslog5424_pri"] } == "30"
|
149
|
+
insist { subject["syslog5424_ver"] } == "1"
|
150
|
+
insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
|
151
|
+
insist { subject["syslog5424_host"] } == "osctrl01"
|
152
|
+
insist { subject["syslog5424_app"] } == nil
|
153
|
+
insist { subject["syslog5424_proc"] } == "8048"
|
154
|
+
insist { subject["syslog5424_msgid"] } == nil
|
155
|
+
insist { subject["syslog5424_sd"] } == nil
|
156
|
+
insist { subject["syslog5424_msg"] } == "Appname is nil"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
describe "parsing an event with multiple messages (array of strings)", :if => false do
|
161
|
+
config <<-CONFIG
|
162
|
+
filter {
|
163
|
+
grok {
|
164
|
+
match => { "message" => "(?:hello|world) %{NUMBER}" }
|
165
|
+
named_captures_only => false
|
166
|
+
}
|
167
|
+
}
|
168
|
+
CONFIG
|
169
|
+
|
170
|
+
sample("message" => [ "hello 12345", "world 23456" ]) do
|
171
|
+
insist { subject["NUMBER"] } == [ "12345", "23456" ]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
describe "coercing matched values" do
|
176
|
+
config <<-CONFIG
|
177
|
+
filter {
|
178
|
+
grok {
|
179
|
+
match => { "message" => "%{NUMBER:foo:int} %{NUMBER:bar:float}" }
|
180
|
+
singles => true
|
181
|
+
}
|
182
|
+
}
|
183
|
+
CONFIG
|
184
|
+
|
185
|
+
sample "400 454.33" do
|
186
|
+
insist { subject["foo"] } == 400
|
187
|
+
insist { subject["foo"] }.is_a?(Fixnum)
|
188
|
+
insist { subject["bar"] } == 454.33
|
189
|
+
insist { subject["bar"] }.is_a?(Float)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
describe "in-line pattern definitions" do
|
194
|
+
config <<-CONFIG
|
195
|
+
filter {
|
196
|
+
grok {
|
197
|
+
match => { "message" => "%{FIZZLE=\\d+}" }
|
198
|
+
named_captures_only => false
|
199
|
+
singles => true
|
200
|
+
}
|
201
|
+
}
|
202
|
+
CONFIG
|
203
|
+
|
204
|
+
sample "hello 1234" do
|
205
|
+
insist { subject["FIZZLE"] } == "1234"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
describe "processing selected fields" do
|
210
|
+
config <<-CONFIG
|
211
|
+
filter {
|
212
|
+
grok {
|
213
|
+
match => { "message" => "%{WORD:word}" }
|
214
|
+
match => { "examplefield" => "%{NUMBER:num}" }
|
215
|
+
break_on_match => false
|
216
|
+
singles => true
|
217
|
+
}
|
218
|
+
}
|
219
|
+
CONFIG
|
220
|
+
|
221
|
+
sample("message" => "hello world", "examplefield" => "12345") do
|
222
|
+
insist { subject["examplefield"] } == "12345"
|
223
|
+
insist { subject["word"] } == "hello"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
describe "adding fields on match" do
|
228
|
+
config <<-CONFIG
|
229
|
+
filter {
|
230
|
+
grok {
|
231
|
+
match => { "message" => "matchme %{NUMBER:fancy}" }
|
232
|
+
singles => true
|
233
|
+
add_field => [ "new_field", "%{fancy}" ]
|
234
|
+
}
|
235
|
+
}
|
236
|
+
CONFIG
|
237
|
+
|
238
|
+
sample "matchme 1234" do
|
239
|
+
insist { subject["tags"] }.nil?
|
240
|
+
insist { subject["new_field"] } == "1234"
|
241
|
+
end
|
242
|
+
|
243
|
+
sample "this will not be matched" do
|
244
|
+
insist { subject["tags"] }.include?("_grokparsefailure")
|
245
|
+
reject { subject }.include?("new_field")
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
context "empty fields" do
|
250
|
+
describe "drop by default" do
|
251
|
+
config <<-CONFIG
|
252
|
+
filter {
|
253
|
+
grok {
|
254
|
+
match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
|
255
|
+
}
|
256
|
+
}
|
257
|
+
CONFIG
|
258
|
+
|
259
|
+
sample "1=test" do
|
260
|
+
insist { subject["tags"] }.nil?
|
261
|
+
insist { subject }.include?("foo1")
|
262
|
+
|
263
|
+
# Since 'foo2' was not captured, it must not be present in the event.
|
264
|
+
reject { subject }.include?("foo2")
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
describe "keep if keep_empty_captures is true" do
|
269
|
+
config <<-CONFIG
|
270
|
+
filter {
|
271
|
+
grok {
|
272
|
+
match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
|
273
|
+
keep_empty_captures => true
|
274
|
+
}
|
275
|
+
}
|
276
|
+
CONFIG
|
277
|
+
|
278
|
+
sample "1=test" do
|
279
|
+
insist { subject["tags"] }.nil?
|
280
|
+
# use .to_hash for this test, for now, because right now
|
281
|
+
# the Event.include? returns false for missing fields as well
|
282
|
+
# as for fields with nil values.
|
283
|
+
insist { subject.to_hash }.include?("foo2")
|
284
|
+
insist { subject.to_hash }.include?("foo2")
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
describe "when named_captures_only == false" do
|
290
|
+
config <<-CONFIG
|
291
|
+
filter {
|
292
|
+
grok {
|
293
|
+
match => { "message" => "Hello %{WORD}. %{WORD:foo}" }
|
294
|
+
named_captures_only => false
|
295
|
+
singles => true
|
296
|
+
}
|
297
|
+
}
|
298
|
+
CONFIG
|
299
|
+
|
300
|
+
sample "Hello World, yo!" do
|
301
|
+
insist { subject }.include?("WORD")
|
302
|
+
insist { subject["WORD"] } == "World"
|
303
|
+
insist { subject }.include?("foo")
|
304
|
+
insist { subject["foo"] } == "yo"
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
describe "using oniguruma named captures (?<name>regex)" do
|
309
|
+
context "plain regexp" do
|
310
|
+
config <<-'CONFIG'
|
311
|
+
filter {
|
312
|
+
grok {
|
313
|
+
singles => true
|
314
|
+
match => { "message" => "(?<foo>\w+)" }
|
315
|
+
}
|
316
|
+
}
|
317
|
+
CONFIG
|
318
|
+
sample "hello world" do
|
319
|
+
insist { subject["tags"] }.nil?
|
320
|
+
insist { subject["foo"] } == "hello"
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
context "grok patterns" do
|
325
|
+
config <<-'CONFIG'
|
326
|
+
filter {
|
327
|
+
grok {
|
328
|
+
singles => true
|
329
|
+
match => { "message" => "(?<timestamp>%{DATE_EU} %{TIME})" }
|
330
|
+
}
|
331
|
+
}
|
332
|
+
CONFIG
|
333
|
+
|
334
|
+
sample "fancy 12-12-12 12:12:12" do
|
335
|
+
insist { subject["tags"] }.nil?
|
336
|
+
insist { subject["timestamp"] } == "12-12-12 12:12:12"
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
describe "grok on integer types" do
|
342
|
+
config <<-'CONFIG'
|
343
|
+
filter {
|
344
|
+
grok {
|
345
|
+
match => { "status" => "^403$" }
|
346
|
+
add_tag => "four_oh_three"
|
347
|
+
}
|
348
|
+
}
|
349
|
+
CONFIG
|
350
|
+
|
351
|
+
sample("status" => 403) do
|
352
|
+
reject { subject["tags"] }.include?("_grokparsefailure")
|
353
|
+
insist { subject["tags"] }.include?("four_oh_three")
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
describe "grok on float types" do
|
358
|
+
config <<-'CONFIG'
|
359
|
+
filter {
|
360
|
+
grok {
|
361
|
+
match => { "version" => "^1.0$" }
|
362
|
+
add_tag => "one_point_oh"
|
363
|
+
}
|
364
|
+
}
|
365
|
+
CONFIG
|
366
|
+
|
367
|
+
sample("version" => 1.0) do
|
368
|
+
insist { subject["tags"] }.include?("one_point_oh")
|
369
|
+
insist { subject["tags"] }.include?("one_point_oh")
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
describe "grok on %{LOGLEVEL}" do
|
374
|
+
config <<-'CONFIG'
|
375
|
+
filter {
|
376
|
+
grok {
|
377
|
+
pattern => "%{LOGLEVEL:level}: error!"
|
378
|
+
}
|
379
|
+
}
|
380
|
+
CONFIG
|
381
|
+
|
382
|
+
log_level_names = %w(
|
383
|
+
trace Trace TRACE
|
384
|
+
debug Debug DEBUG
|
385
|
+
notice Notice Notice
|
386
|
+
info Info INFO
|
387
|
+
warn warning Warn Warning WARN WARNING
|
388
|
+
err error Err Error ERR ERROR
|
389
|
+
crit critical Crit Critical CRIT CRITICAL
|
390
|
+
fatal Fatal FATAL
|
391
|
+
severe Severe SEVERE
|
392
|
+
emerg emergency Emerg Emergency EMERG EMERGENCY
|
393
|
+
)
|
394
|
+
log_level_names.each do |level_name|
|
395
|
+
sample "#{level_name}: error!" do
|
396
|
+
insist { subject['level'] } == level_name
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
describe "tagging on failure" do
|
402
|
+
config <<-CONFIG
|
403
|
+
filter {
|
404
|
+
grok {
|
405
|
+
match => { "message" => "matchme %{NUMBER:fancy}" }
|
406
|
+
tag_on_failure => false
|
407
|
+
}
|
408
|
+
}
|
409
|
+
CONFIG
|
410
|
+
|
411
|
+
sample "matchme 1234" do
|
412
|
+
insist { subject["tags"] }.nil?
|
413
|
+
end
|
414
|
+
|
415
|
+
sample "this will not be matched" do
|
416
|
+
insist { subject["tags"] }.include?("false")
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
describe "captures named fields even if the whole text matches" do
|
421
|
+
config <<-CONFIG
|
422
|
+
filter {
|
423
|
+
grok {
|
424
|
+
match => { "message" => "%{DATE_EU:stimestamp}" }
|
425
|
+
singles => true
|
426
|
+
}
|
427
|
+
}
|
428
|
+
CONFIG
|
429
|
+
|
430
|
+
sample "11/01/01" do
|
431
|
+
insist { subject["stimestamp"] } == "11/01/01"
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
describe "allow dashes in capture names" do
|
436
|
+
config <<-CONFIG
|
437
|
+
filter {
|
438
|
+
grok {
|
439
|
+
match => { "message" => "%{WORD:foo-bar}" }
|
440
|
+
singles => true
|
441
|
+
}
|
442
|
+
}
|
443
|
+
CONFIG
|
444
|
+
|
445
|
+
sample "hello world" do
|
446
|
+
insist { subject["foo-bar"] } == "hello"
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
describe "performance test", :performance => true do
|
451
|
+
event_count = 100000
|
452
|
+
min_rate = 2000
|
453
|
+
|
454
|
+
max_duration = event_count / min_rate
|
455
|
+
input = "Nov 24 01:29:01 -0800"
|
456
|
+
config <<-CONFIG
|
457
|
+
input {
|
458
|
+
generator {
|
459
|
+
count => #{event_count}
|
460
|
+
message => "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"
|
461
|
+
}
|
462
|
+
}
|
463
|
+
filter {
|
464
|
+
grok {
|
465
|
+
match => { "message" => "%{SYSLOGLINE}" }
|
466
|
+
singles => true
|
467
|
+
overwrite => [ "message" ]
|
468
|
+
}
|
469
|
+
}
|
470
|
+
output { null { } }
|
471
|
+
CONFIG
|
472
|
+
|
473
|
+
2.times do
|
474
|
+
start = Time.now
|
475
|
+
agent do
|
476
|
+
duration = (Time.now - start)
|
477
|
+
puts "filters/grok parse rate: #{"%02.0f/sec" % (event_count / duration)}, elapsed: #{duration}s"
|
478
|
+
insist { duration } < max_duration
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
describe "singles with duplicate-named fields" do
|
484
|
+
config <<-CONFIG
|
485
|
+
filter {
|
486
|
+
grok {
|
487
|
+
match => { "message" => "%{INT:foo}|%{WORD:foo}" }
|
488
|
+
singles => true
|
489
|
+
}
|
490
|
+
}
|
491
|
+
CONFIG
|
492
|
+
|
493
|
+
sample "hello world" do
|
494
|
+
insist { subject["foo"] }.is_a?(String)
|
495
|
+
end
|
496
|
+
|
497
|
+
sample "123 world" do
|
498
|
+
insist { subject["foo"] }.is_a?(String)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
describe "break_on_match default should be true and first match should exit filter" do
|
503
|
+
config <<-CONFIG
|
504
|
+
filter {
|
505
|
+
grok {
|
506
|
+
match => { "message" => "%{INT:foo}"
|
507
|
+
"somefield" => "%{INT:bar}"}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
CONFIG
|
511
|
+
|
512
|
+
sample("message" => "hello world 123", "somefield" => "testme abc 999") do
|
513
|
+
insist { subject["foo"] } == "123"
|
514
|
+
insist { subject["bar"] }.nil?
|
515
|
+
end
|
516
|
+
end
|
517
|
+
|
518
|
+
describe "break_on_match when set to false should try all patterns" do
|
519
|
+
config <<-CONFIG
|
520
|
+
filter {
|
521
|
+
grok {
|
522
|
+
match => { "message" => "%{INT:foo}"
|
523
|
+
"somefield" => "%{INT:bar}"}
|
524
|
+
break_on_match => false
|
525
|
+
}
|
526
|
+
}
|
527
|
+
CONFIG
|
528
|
+
|
529
|
+
sample("message" => "hello world 123", "somefield" => "testme abc 999") do
|
530
|
+
insist { subject["foo"] } == "123"
|
531
|
+
insist { subject["bar"] } == "999"
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do
|
536
|
+
config <<-CONFIG
|
537
|
+
filter {
|
538
|
+
grok {
|
539
|
+
match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] }
|
540
|
+
break_on_match => false
|
541
|
+
}
|
542
|
+
}
|
543
|
+
CONFIG
|
544
|
+
|
545
|
+
sample "treebranch" do
|
546
|
+
insist { subject["name2"] } == "branch"
|
547
|
+
end
|
548
|
+
|
549
|
+
sample "bushbeard" do
|
550
|
+
insist { subject["name1"] } == "bush"
|
551
|
+
end
|
552
|
+
|
553
|
+
sample "treebeard" do
|
554
|
+
insist { subject["name1"] } == "tree"
|
555
|
+
insist { subject["name2"] } == "beard"
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
describe "break_on_match default for array input with single grok pattern" do
|
560
|
+
config <<-CONFIG
|
561
|
+
filter {
|
562
|
+
grok {
|
563
|
+
match => { "message" => "%{INT:foo}"}
|
564
|
+
}
|
565
|
+
}
|
566
|
+
CONFIG
|
567
|
+
|
568
|
+
# array input --
|
569
|
+
sample("message" => ["hello world 123", "line 23"]) do
|
570
|
+
insist { subject["foo"] } == ["123", "23"]
|
571
|
+
insist { subject["tags"] }.nil?
|
572
|
+
end
|
573
|
+
|
574
|
+
# array input, one of them matches
|
575
|
+
sample("message" => ["hello world 123", "abc"]) do
|
576
|
+
insist { subject["foo"] } == "123"
|
577
|
+
insist { subject["tags"] }.nil?
|
578
|
+
end
|
579
|
+
end
|
580
|
+
|
581
|
+
describe "break_on_match = true (default) for array input with multiple grok pattern" do
|
582
|
+
config <<-CONFIG
|
583
|
+
filter {
|
584
|
+
grok {
|
585
|
+
match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
|
586
|
+
}
|
587
|
+
}
|
588
|
+
CONFIG
|
589
|
+
|
590
|
+
# array input --
|
591
|
+
sample("message" => ["hello world 123", "line 23"]) do
|
592
|
+
insist { subject["foo"] } == ["123", "23"]
|
593
|
+
insist { subject["bar"] }.nil?
|
594
|
+
insist { subject["tags"] }.nil?
|
595
|
+
end
|
596
|
+
|
597
|
+
# array input, one of them matches
|
598
|
+
sample("message" => ["hello world", "line 23"]) do
|
599
|
+
insist { subject["bar"] } == "hello"
|
600
|
+
insist { subject["foo"] } == "23"
|
601
|
+
insist { subject["tags"] }.nil?
|
602
|
+
end
|
603
|
+
end
|
604
|
+
|
605
|
+
describe "break_on_match = false for array input with multiple grok pattern" do
|
606
|
+
config <<-CONFIG
|
607
|
+
filter {
|
608
|
+
grok {
|
609
|
+
match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
|
610
|
+
break_on_match => false
|
611
|
+
}
|
612
|
+
}
|
613
|
+
CONFIG
|
614
|
+
|
615
|
+
# array input --
|
616
|
+
sample("message" => ["hello world 123", "line 23"]) do
|
617
|
+
insist { subject["foo"] } == ["123", "23"]
|
618
|
+
insist { subject["bar"] } == ["hello", "line"]
|
619
|
+
insist { subject["tags"] }.nil?
|
620
|
+
end
|
621
|
+
|
622
|
+
# array input, one of them matches
|
623
|
+
sample("message" => ["hello world", "line 23"]) do
|
624
|
+
insist { subject["bar"] } == ["hello", "line"]
|
625
|
+
insist { subject["foo"] } == "23"
|
626
|
+
insist { subject["tags"] }.nil?
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
describe "grok with unicode" do
|
631
|
+
config <<-CONFIG
|
632
|
+
filter {
|
633
|
+
grok {
|
634
|
+
#pattern => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
|
635
|
+
pattern => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}"
|
636
|
+
}
|
637
|
+
}
|
638
|
+
CONFIG
|
639
|
+
|
640
|
+
sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do
|
641
|
+
insist { subject["tags"] }.nil?
|
642
|
+
insist { subject["syslog_pri"] } == "22"
|
643
|
+
insist { subject["syslog_program"] } == "postfix/policy-spf"
|
644
|
+
end
|
645
|
+
end
|
646
|
+
|
647
|
+
|
648
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-filter-grok
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: jls-grok
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.11.0
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - '='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.11.0
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: logstash-patterns-core
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
description: Grok is currently the best way in logstash to parse crappy unstructured
|
62
|
+
log data into something structured and queryable.
|
63
|
+
email: richard.pijnenburg@elasticsearch.com
|
64
|
+
executables: []
|
65
|
+
extensions: []
|
66
|
+
extra_rdoc_files: []
|
67
|
+
files:
|
68
|
+
- .gitignore
|
69
|
+
- Gemfile
|
70
|
+
- LICENSE
|
71
|
+
- Rakefile
|
72
|
+
- lib/logstash/filters/grok.rb
|
73
|
+
- logstash-filter-grok.gemspec
|
74
|
+
- rakelib/publish.rake
|
75
|
+
- rakelib/vendor.rake
|
76
|
+
- spec/filters/grok_spec.rb
|
77
|
+
homepage: http://logstash.net/
|
78
|
+
licenses:
|
79
|
+
- Apache License (2.0)
|
80
|
+
metadata:
|
81
|
+
logstash_plugin: 'true'
|
82
|
+
group: filter
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
requirements: []
|
98
|
+
rubyforge_project:
|
99
|
+
rubygems_version: 2.4.1
|
100
|
+
signing_key:
|
101
|
+
specification_version: 4
|
102
|
+
summary: Parse arbitrary text and structure it.
|
103
|
+
test_files:
|
104
|
+
- spec/filters/grok_spec.rb
|