logstash-filter-grok 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
5
+ data.tar.gz: !binary |-
6
+ YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
10
+ NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
11
+ NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
12
+ data.tar.gz: !binary |-
13
+ ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
14
+ NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
15
+ YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,363 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+ require "logstash/environment"
5
+ require "logstash/patterns/core"
6
+ require "set"
7
+
8
+ # Parse arbitrary text and structure it.
9
+ #
10
+ # Grok is currently the best way in logstash to parse crappy unstructured log
11
+ # data into something structured and queryable.
12
+ #
13
+ # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
+ # logs, and in general, any log format that is generally written for humans
15
+ # and not computer consumption.
16
+ #
17
+ # Logstash ships with about 120 patterns by default. You can find them here:
18
+ # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
+ # your own trivially. (See the patterns_dir setting)
20
+ #
21
+ # If you need help building patterns to match your logs, you will find the
22
+ # <http://grokdebug.herokuapp.com> too quite useful!
23
+ #
24
+ # #### Grok Basics
25
+ #
26
+ # Grok works by combining text patterns into something that matches your
27
+ # logs.
28
+ #
29
+ # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
+ #
31
+ # The `SYNTAX` is the name of the pattern that will match your text. For
32
+ # example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
33
+ # be matched by the IP pattern. The syntax is how you match.
34
+ #
35
+ # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
+ # For example, "3.44" could be the duration of an event, so you could call it
37
+ # simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
38
+ # making a request.
39
+ #
40
+ # For the above example, your grok filter would look something like this:
41
+ #
42
+ # %{NUMBER:duration} %{IP:client}
43
+ #
44
+ # Optionally you can add a data type conversion to your grok pattern. By default
45
+ # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
+ # for example change a string to an integer then suffix it with the target data type.
47
+ # For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
48
+ # integer. Currently the only supported conversions are `int` and `float`.
49
+ #
50
+ # #### Example
51
+ #
52
+ # With that idea of a syntax and semantic, we can pull out useful fields from a
53
+ # sample log like this fictional http request log:
54
+ #
55
+ # 55.3.244.1 GET /index.html 15824 0.043
56
+ #
57
+ # The pattern for this could be:
58
+ #
59
+ # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
+ #
61
+ # A more realistic example, let's read these logs from a file:
62
+ #
63
+ # input {
64
+ # file {
65
+ # path => "/var/log/http.log"
66
+ # }
67
+ # }
68
+ # filter {
69
+ # grok {
70
+ # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
+ # }
72
+ # }
73
+ #
74
+ # After the grok filter, the event will have a few extra fields in it:
75
+ #
76
+ # * client: 55.3.244.1
77
+ # * method: GET
78
+ # * request: /index.html
79
+ # * bytes: 15824
80
+ # * duration: 0.043
81
+ #
82
+ # #### Regular Expressions
83
+ #
84
+ # Grok sits on top of regular expressions, so any regular expressions are valid
85
+ # in grok as well. The regular expression library is Oniguruma, and you can see
86
+ # the full supported regexp syntax [on the Onigiruma
87
+ # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
88
+ #
89
+ # #### Custom Patterns
90
+ #
91
+ # Sometimes logstash doesn't have a pattern you need. For this, you have
92
+ # a few options.
93
+ #
94
+ # First, you can use the Oniguruma syntax for 'named capture' which will
95
+ # let you match a piece of text and save it as a field:
96
+ #
97
+ # (?<field_name>the pattern here)
98
+ #
99
+ # For example, postfix logs have a 'queue id' that is an 10 or 11-character
100
+ # hexadecimal value. I can capture that easily like this:
101
+ #
102
+ # (?<queue_id>[0-9A-F]{10,11})
103
+ #
104
+ # Alternately, you can create a custom patterns file.
105
+ #
106
+ # * Create a directory called `patterns` with a file in it called `extra`
107
+ # (the file name doesn't matter, but name it meaningfully for yourself)
108
+ # * In that file, write the pattern you need as the pattern name, a space, then
109
+ # the regexp for that pattern.
110
+ #
111
+ # For example, doing the postfix queue id example as above:
112
+ #
113
+ # # contents of ./patterns/postfix:
114
+ # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
+ #
116
+ # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
+ # your custom patterns directory is. Here's a full example with a sample log:
118
+ #
119
+ # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
+ #
121
+ # filter {
122
+ # grok {
123
+ # patterns_dir => "./patterns"
124
+ # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
125
+ # }
126
+ # }
127
+ #
128
+ # The above will match and result in the following fields:
129
+ #
130
+ # * timestamp: Jan 1 06:25:43
131
+ # * logsource: mailserver14
132
+ # * program: postfix/cleanup
133
+ # * pid: 21403
134
+ # * queue_id: BEF25A72965
135
+ # * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
136
+ #
137
+ # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
+ # SYSLOGBASE pattern which itself is defined by other patterns.
139
+ class LogStash::Filters::Grok < LogStash::Filters::Base
140
+ config_name "grok"
141
+ milestone 3
142
+
143
+ # Specify a pattern to parse with. This will match the 'message' field.
144
+ #
145
+ # If you want to match other fields than message, use the 'match' setting.
146
+ # Multiple patterns is fine.
147
+ config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
+
149
+ # A hash of matches of field => value
150
+ #
151
+ # For example:
152
+ #
153
+ # filter {
154
+ # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
+ # }
156
+ #
157
+ # Alternatively, using the old array syntax:
158
+ #
159
+ # filter {
160
+ # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
161
+ # }
162
+ #
163
+ config :match, :validate => :hash, :default => {}
164
+
165
+ #
166
+ # logstash ships by default with a bunch of patterns, so you don't
167
+ # necessarily need to define this yourself unless you are adding additional
168
+ # patterns.
169
+ #
170
+ # Pattern files are plain text with format:
171
+ #
172
+ # NAME PATTERN
173
+ #
174
+ # For example:
175
+ #
176
+ # NUMBER \d+
177
+ config :patterns_dir, :validate => :array, :default => []
178
+
179
+ # Drop if matched. Note, this feature may not stay. It is preferable to combine
180
+ # grok + grep filters to do parsing + dropping.
181
+ config :drop_if_match, :validate => :boolean, :default => false
182
+
183
+ # Break on first match. The first successful match by grok will result in the
184
+ # filter being finished. If you want grok to try all patterns (maybe you are
185
+ # parsing different things), then set this to false.
186
+ config :break_on_match, :validate => :boolean, :default => true
187
+
188
+ # If true, only store named captures from grok.
189
+ config :named_captures_only, :validate => :boolean, :default => true
190
+
191
+ # If true, keep empty captures as event fields.
192
+ config :keep_empty_captures, :validate => :boolean, :default => false
193
+
194
+ # If true, make single-value fields simply that value, not an array
195
+ # containing that one value.
196
+ config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
197
+
198
+ # Append values to the 'tags' field when there has been no
199
+ # successful match
200
+ config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
201
+
202
+ # The fields to overwrite.
203
+ #
204
+ # This allows you to overwrite a value in a field that already exists.
205
+ #
206
+ # For example, if you have a syslog line in the 'message' field, you can
207
+ # overwrite the 'message' field with part of the match like so:
208
+ #
209
+ # filter {
210
+ # grok {
211
+ # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
212
+ # overwrite => [ "message" ]
213
+ # }
214
+ # }
215
+ #
216
+ # In this case, a line like "May 29 16:37:11 sadness logger: hello world"
217
+ # will be parsed and 'hello world' will overwrite the original message.
218
+ config :overwrite, :validate => :array, :default => []
219
+
220
+ # Detect if we are running from a jarfile, pick the right path.
221
+ @@patterns_path ||= Set.new
222
+ #@@patterns_path += [LogStash::Environment.pattern_path("*")]
223
+ @@patterns_path += [LogStash::Patterns::Core.path]
224
+
225
+ public
226
+ def initialize(params)
227
+ super(params)
228
+ @match["message"] ||= []
229
+ @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
230
+ # a cache of capture name handler methods.
231
+ @handlers = {}
232
+ end
233
+
234
+ public
235
+ def register
236
+ require "grok-pure" # rubygem 'jls-grok'
237
+
238
+ @patternfiles = []
239
+
240
+ # Have @@patterns_path show first. Last-in pattern definitions win; this
241
+ # will let folks redefine built-in patterns at runtime.
242
+ @patterns_dir = @@patterns_path.to_a + @patterns_dir
243
+ @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
244
+ @patterns_dir.each do |path|
245
+ if File.directory?(path)
246
+ path = File.join(path, "*")
247
+ end
248
+
249
+ Dir.glob(path).each do |file|
250
+ @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
251
+ @patternfiles << file
252
+ end
253
+ end
254
+
255
+ @patterns = Hash.new { |h,k| h[k] = [] }
256
+
257
+ @logger.info? and @logger.info("Match data", :match => @match)
258
+
259
+ @match.each do |field, patterns|
260
+ patterns = [patterns] if patterns.is_a?(String)
261
+
262
+ @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
263
+ patterns.each do |pattern|
264
+ @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
265
+ grok = Grok.new
266
+ grok.logger = @logger unless @logger.nil?
267
+ add_patterns_from_files(@patternfiles, grok)
268
+ grok.compile(pattern, @named_captures_only)
269
+ @patterns[field] << grok
270
+ end
271
+ end # @match.each
272
+ end # def register
273
+
274
+ public
275
+ def filter(event)
276
+ return unless filter?(event)
277
+
278
+ matched = false
279
+ done = false
280
+
281
+ @logger.debug? and @logger.debug("Running grok filter", :event => event);
282
+ @patterns.each do |field, groks|
283
+ if match(groks, field, event)
284
+ matched = true
285
+ break if @break_on_match
286
+ end
287
+ #break if done
288
+ end # @patterns.each
289
+
290
+ if matched
291
+ filter_matched(event)
292
+ else
293
+ # Tag this event if we can't parse it. We can use this later to
294
+ # reparse+reindex logs if we improve the patterns given.
295
+ @tag_on_failure.each do |tag|
296
+ event["tags"] ||= []
297
+ event["tags"] << tag unless event["tags"].include?(tag)
298
+ end
299
+ end
300
+
301
+ @logger.debug? and @logger.debug("Event now: ", :event => event)
302
+ end # def filter
303
+
304
+ private
305
+ def match(groks, field, event)
306
+ input = event[field]
307
+ if input.is_a?(Array)
308
+ success = false
309
+ input.each do |input|
310
+ success |= match_against_groks(groks, input, event)
311
+ end
312
+ return success
313
+ else
314
+ return match_against_groks(groks, input, event)
315
+ end
316
+ rescue StandardError => e
317
+ @logger.warn("Grok regexp threw exception", :exception => e.message)
318
+ end
319
+
320
+ private
321
+ def match_against_groks(groks, input, event)
322
+ matched = false
323
+ groks.each do |grok|
324
+ # Convert anything else to string (number, hash, etc)
325
+ matched = grok.match_and_capture(input.to_s) do |field, value|
326
+ matched = true
327
+ handle(field, value, event)
328
+ end
329
+ break if matched and @break_on_match
330
+ end
331
+ return matched
332
+ end
333
+
334
+ private
335
+ def handle(field, value, event)
336
+ return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
337
+
338
+ if @overwrite.include?(field)
339
+ event[field] = value
340
+ else
341
+ v = event[field]
342
+ if v.nil?
343
+ event[field] = value
344
+ elsif v.is_a?(Array)
345
+ event[field] << value
346
+ elsif v.is_a?(String)
347
+ # Promote to array since we aren't overwriting.
348
+ event[field] = [v, value]
349
+ end
350
+ end
351
+ end
352
+
353
+ private
354
+ def add_patterns_from_files(paths, grok)
355
+ paths.each do |path|
356
+ if !File.exists?(path)
357
+ raise "Grok pattern file does not exist: #{path}"
358
+ end
359
+ grok.add_patterns_from_file(path)
360
+ end
361
+ end # def add_patterns_from_files
362
+
363
+ end # class LogStash::Filters::Grok
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-grok'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Parse arbitrary text and structure it."
7
+ s.description = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'jls-grok', ['0.11.0']
26
+ s.add_runtime_dependency 'logstash-patterns-core'
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,648 @@
1
+ # encoding: utf-8
2
+ require "spec_helper"
3
+ require "logstash/filters/grok"
4
+
5
+ describe LogStash::Filters::Grok do
6
+
7
+ describe "simple syslog line" do
8
+ # The logstash config goes here.
9
+ # At this time, only filters are supported.
10
+ config <<-CONFIG
11
+ filter {
12
+ grok {
13
+ match => { "message" => "%{SYSLOGLINE}" }
14
+ singles => true
15
+ overwrite => [ "message" ]
16
+ }
17
+ }
18
+ CONFIG
19
+
20
+ sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
21
+ insist { subject["tags"] }.nil?
22
+ insist { subject["logsource"] } == "evita"
23
+ insist { subject["timestamp"] } == "Mar 16 00:01:25"
24
+ insist { subject["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
25
+ insist { subject["program"] } == "postfix/smtpd"
26
+ insist { subject["pid"] } == "1713"
27
+ end
28
+ end
29
+
30
+ describe "ietf 5424 syslog line" do
31
+ # The logstash config goes here.
32
+ # At this time, only filters are supported.
33
+ config <<-CONFIG
34
+ filter {
35
+ grok {
36
+ match => { "message" => "%{SYSLOG5424LINE}" }
37
+ singles => true
38
+ }
39
+ }
40
+ CONFIG
41
+
42
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\"bar\"][id2 baz=\"something\"] Hello, syslog." do
43
+ insist { subject["tags"] }.nil?
44
+ insist { subject["syslog5424_pri"] } == "191"
45
+ insist { subject["syslog5424_ver"] } == "1"
46
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
47
+ insist { subject["syslog5424_host"] } == "paxton.local"
48
+ insist { subject["syslog5424_app"] } == "grokdebug"
49
+ insist { subject["syslog5424_proc"] } == "4123"
50
+ insist { subject["syslog5424_msgid"] } == nil
51
+ insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"][id2 baz=\"something\"]"
52
+ insist { subject["syslog5424_msg"] } == "Hello, syslog."
53
+ end
54
+
55
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - [id1 foo=\"bar\"] No process ID." do
56
+ insist { subject["tags"] }.nil?
57
+ insist { subject["syslog5424_pri"] } == "191"
58
+ insist { subject["syslog5424_ver"] } == "1"
59
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
60
+ insist { subject["syslog5424_host"] } == "paxton.local"
61
+ insist { subject["syslog5424_app"] } == "grokdebug"
62
+ insist { subject["syslog5424_proc"] } == nil
63
+ insist { subject["syslog5424_msgid"] } == nil
64
+ insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"]"
65
+ insist { subject["syslog5424_msg"] } == "No process ID."
66
+ end
67
+
68
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - No structured data." do
69
+ insist { subject["tags"] }.nil?
70
+ insist { subject["syslog5424_pri"] } == "191"
71
+ insist { subject["syslog5424_ver"] } == "1"
72
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
73
+ insist { subject["syslog5424_host"] } == "paxton.local"
74
+ insist { subject["syslog5424_app"] } == "grokdebug"
75
+ insist { subject["syslog5424_proc"] } == "4123"
76
+ insist { subject["syslog5424_msgid"] } == nil
77
+ insist { subject["syslog5424_sd"] } == nil
78
+ insist { subject["syslog5424_msg"] } == "No structured data."
79
+ end
80
+
81
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - - No PID or SD." do
82
+ insist { subject["tags"] }.nil?
83
+ insist { subject["syslog5424_pri"] } == "191"
84
+ insist { subject["syslog5424_ver"] } == "1"
85
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
86
+ insist { subject["syslog5424_host"] } == "paxton.local"
87
+ insist { subject["syslog5424_app"] } == "grokdebug"
88
+ insist { subject["syslog5424_proc"] } == nil
89
+ insist { subject["syslog5424_msgid"] } == nil
90
+ insist { subject["syslog5424_sd"] } == nil
91
+ insist { subject["syslog5424_msg"] } == "No PID or SD."
92
+ end
93
+
94
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Missing structured data." do
95
+ insist { subject["tags"] }.nil?
96
+ insist { subject["syslog5424_pri"] } == "191"
97
+ insist { subject["syslog5424_ver"] } == "1"
98
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
99
+ insist { subject["syslog5424_host"] } == "paxton.local"
100
+ insist { subject["syslog5424_app"] } == "grokdebug"
101
+ insist { subject["syslog5424_proc"] } == "4123"
102
+ insist { subject["syslog5424_msgid"] } == nil
103
+ insist { subject["syslog5424_sd"] } == nil
104
+ insist { subject["syslog5424_msg"] } == "Missing structured data."
105
+ end
106
+
107
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - Additional spaces." do
108
+ insist { subject["tags"] }.nil?
109
+ insist { subject["syslog5424_pri"] } == "191"
110
+ insist { subject["syslog5424_ver"] } == "1"
111
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
112
+ insist { subject["syslog5424_host"] } == "paxton.local"
113
+ insist { subject["syslog5424_app"] } == "grokdebug"
114
+ insist { subject["syslog5424_proc"] } == "4123"
115
+ insist { subject["syslog5424_msgid"] } == nil
116
+ insist { subject["syslog5424_sd"] } == nil
117
+ insist { subject["syslog5424_msg"] } == "Additional spaces."
118
+ end
119
+
120
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Additional spaces and missing SD." do
121
+ insist { subject["tags"] }.nil?
122
+ insist { subject["syslog5424_pri"] } == "191"
123
+ insist { subject["syslog5424_ver"] } == "1"
124
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
125
+ insist { subject["syslog5424_host"] } == "paxton.local"
126
+ insist { subject["syslog5424_app"] } == "grokdebug"
127
+ insist { subject["syslog5424_proc"] } == "4123"
128
+ insist { subject["syslog5424_msgid"] } == nil
129
+ insist { subject["syslog5424_sd"] } == nil
130
+ insist { subject["syslog5424_msg"] } == "Additional spaces and missing SD."
131
+ end
132
+
133
+ sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 dnsmasq-dhcp 8048 - - Appname contains a dash" do
134
+ insist { subject["tags"] }.nil?
135
+ insist { subject["syslog5424_pri"] } == "30"
136
+ insist { subject["syslog5424_ver"] } == "1"
137
+ insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
138
+ insist { subject["syslog5424_host"] } == "osctrl01"
139
+ insist { subject["syslog5424_app"] } == "dnsmasq-dhcp"
140
+ insist { subject["syslog5424_proc"] } == "8048"
141
+ insist { subject["syslog5424_msgid"] } == nil
142
+ insist { subject["syslog5424_sd"] } == nil
143
+ insist { subject["syslog5424_msg"] } == "Appname contains a dash"
144
+ end
145
+
146
+ sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 - 8048 - - Appname is nil" do
147
+ insist { subject["tags"] }.nil?
148
+ insist { subject["syslog5424_pri"] } == "30"
149
+ insist { subject["syslog5424_ver"] } == "1"
150
+ insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
151
+ insist { subject["syslog5424_host"] } == "osctrl01"
152
+ insist { subject["syslog5424_app"] } == nil
153
+ insist { subject["syslog5424_proc"] } == "8048"
154
+ insist { subject["syslog5424_msgid"] } == nil
155
+ insist { subject["syslog5424_sd"] } == nil
156
+ insist { subject["syslog5424_msg"] } == "Appname is nil"
157
+ end
158
+ end
159
+
160
+ describe "parsing an event with multiple messages (array of strings)", :if => false do
161
+ config <<-CONFIG
162
+ filter {
163
+ grok {
164
+ match => { "message" => "(?:hello|world) %{NUMBER}" }
165
+ named_captures_only => false
166
+ }
167
+ }
168
+ CONFIG
169
+
170
+ sample("message" => [ "hello 12345", "world 23456" ]) do
171
+ insist { subject["NUMBER"] } == [ "12345", "23456" ]
172
+ end
173
+ end
174
+
175
+ describe "coercing matched values" do
176
+ config <<-CONFIG
177
+ filter {
178
+ grok {
179
+ match => { "message" => "%{NUMBER:foo:int} %{NUMBER:bar:float}" }
180
+ singles => true
181
+ }
182
+ }
183
+ CONFIG
184
+
185
+ sample "400 454.33" do
186
+ insist { subject["foo"] } == 400
187
+ insist { subject["foo"] }.is_a?(Fixnum)
188
+ insist { subject["bar"] } == 454.33
189
+ insist { subject["bar"] }.is_a?(Float)
190
+ end
191
+ end
192
+
193
+ describe "in-line pattern definitions" do
194
+ config <<-CONFIG
195
+ filter {
196
+ grok {
197
+ match => { "message" => "%{FIZZLE=\\d+}" }
198
+ named_captures_only => false
199
+ singles => true
200
+ }
201
+ }
202
+ CONFIG
203
+
204
+ sample "hello 1234" do
205
+ insist { subject["FIZZLE"] } == "1234"
206
+ end
207
+ end
208
+
209
+ describe "processing selected fields" do
210
+ config <<-CONFIG
211
+ filter {
212
+ grok {
213
+ match => { "message" => "%{WORD:word}" }
214
+ match => { "examplefield" => "%{NUMBER:num}" }
215
+ break_on_match => false
216
+ singles => true
217
+ }
218
+ }
219
+ CONFIG
220
+
221
+ sample("message" => "hello world", "examplefield" => "12345") do
222
+ insist { subject["examplefield"] } == "12345"
223
+ insist { subject["word"] } == "hello"
224
+ end
225
+ end
226
+
227
+ describe "adding fields on match" do
228
+ config <<-CONFIG
229
+ filter {
230
+ grok {
231
+ match => { "message" => "matchme %{NUMBER:fancy}" }
232
+ singles => true
233
+ add_field => [ "new_field", "%{fancy}" ]
234
+ }
235
+ }
236
+ CONFIG
237
+
238
+ sample "matchme 1234" do
239
+ insist { subject["tags"] }.nil?
240
+ insist { subject["new_field"] } == "1234"
241
+ end
242
+
243
+ sample "this will not be matched" do
244
+ insist { subject["tags"] }.include?("_grokparsefailure")
245
+ reject { subject }.include?("new_field")
246
+ end
247
+ end
248
+
249
+ context "empty fields" do
250
+ describe "drop by default" do
251
+ config <<-CONFIG
252
+ filter {
253
+ grok {
254
+ match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
255
+ }
256
+ }
257
+ CONFIG
258
+
259
+ sample "1=test" do
260
+ insist { subject["tags"] }.nil?
261
+ insist { subject }.include?("foo1")
262
+
263
+ # Since 'foo2' was not captured, it must not be present in the event.
264
+ reject { subject }.include?("foo2")
265
+ end
266
+ end
267
+
268
+ describe "keep if keep_empty_captures is true" do
269
+ config <<-CONFIG
270
+ filter {
271
+ grok {
272
+ match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
273
+ keep_empty_captures => true
274
+ }
275
+ }
276
+ CONFIG
277
+
278
+ sample "1=test" do
279
+ insist { subject["tags"] }.nil?
280
+ # use .to_hash for this test, for now, because right now
281
+ # the Event.include? returns false for missing fields as well
282
+ # as for fields with nil values.
283
+ insist { subject.to_hash }.include?("foo2")
284
+ insist { subject.to_hash }.include?("foo2")
285
+ end
286
+ end
287
+ end
288
+
289
+ describe "when named_captures_only == false" do
290
+ config <<-CONFIG
291
+ filter {
292
+ grok {
293
+ match => { "message" => "Hello %{WORD}. %{WORD:foo}" }
294
+ named_captures_only => false
295
+ singles => true
296
+ }
297
+ }
298
+ CONFIG
299
+
300
+ sample "Hello World, yo!" do
301
+ insist { subject }.include?("WORD")
302
+ insist { subject["WORD"] } == "World"
303
+ insist { subject }.include?("foo")
304
+ insist { subject["foo"] } == "yo"
305
+ end
306
+ end
307
+
308
+ describe "using oniguruma named captures (?<name>regex)" do
309
+ context "plain regexp" do
310
+ config <<-'CONFIG'
311
+ filter {
312
+ grok {
313
+ singles => true
314
+ match => { "message" => "(?<foo>\w+)" }
315
+ }
316
+ }
317
+ CONFIG
318
+ sample "hello world" do
319
+ insist { subject["tags"] }.nil?
320
+ insist { subject["foo"] } == "hello"
321
+ end
322
+ end
323
+
324
+ context "grok patterns" do
325
+ config <<-'CONFIG'
326
+ filter {
327
+ grok {
328
+ singles => true
329
+ match => { "message" => "(?<timestamp>%{DATE_EU} %{TIME})" }
330
+ }
331
+ }
332
+ CONFIG
333
+
334
+ sample "fancy 12-12-12 12:12:12" do
335
+ insist { subject["tags"] }.nil?
336
+ insist { subject["timestamp"] } == "12-12-12 12:12:12"
337
+ end
338
+ end
339
+ end
340
+
341
+ describe "grok on integer types" do
342
+ config <<-'CONFIG'
343
+ filter {
344
+ grok {
345
+ match => { "status" => "^403$" }
346
+ add_tag => "four_oh_three"
347
+ }
348
+ }
349
+ CONFIG
350
+
351
+ sample("status" => 403) do
352
+ reject { subject["tags"] }.include?("_grokparsefailure")
353
+ insist { subject["tags"] }.include?("four_oh_three")
354
+ end
355
+ end
356
+
357
+ describe "grok on float types" do
358
+ config <<-'CONFIG'
359
+ filter {
360
+ grok {
361
+ match => { "version" => "^1.0$" }
362
+ add_tag => "one_point_oh"
363
+ }
364
+ }
365
+ CONFIG
366
+
367
+ sample("version" => 1.0) do
368
+ insist { subject["tags"] }.include?("one_point_oh")
369
+ insist { subject["tags"] }.include?("one_point_oh")
370
+ end
371
+ end
372
+
373
+ describe "grok on %{LOGLEVEL}" do
374
+ config <<-'CONFIG'
375
+ filter {
376
+ grok {
377
+ pattern => "%{LOGLEVEL:level}: error!"
378
+ }
379
+ }
380
+ CONFIG
381
+
382
+ log_level_names = %w(
383
+ trace Trace TRACE
384
+ debug Debug DEBUG
385
+ notice Notice Notice
386
+ info Info INFO
387
+ warn warning Warn Warning WARN WARNING
388
+ err error Err Error ERR ERROR
389
+ crit critical Crit Critical CRIT CRITICAL
390
+ fatal Fatal FATAL
391
+ severe Severe SEVERE
392
+ emerg emergency Emerg Emergency EMERG EMERGENCY
393
+ )
394
+ log_level_names.each do |level_name|
395
+ sample "#{level_name}: error!" do
396
+ insist { subject['level'] } == level_name
397
+ end
398
+ end
399
+ end
400
+
401
+ describe "tagging on failure" do
402
+ config <<-CONFIG
403
+ filter {
404
+ grok {
405
+ match => { "message" => "matchme %{NUMBER:fancy}" }
406
+ tag_on_failure => false
407
+ }
408
+ }
409
+ CONFIG
410
+
411
+ sample "matchme 1234" do
412
+ insist { subject["tags"] }.nil?
413
+ end
414
+
415
+ sample "this will not be matched" do
416
+ insist { subject["tags"] }.include?("false")
417
+ end
418
+ end
419
+
420
+ describe "captures named fields even if the whole text matches" do
421
+ config <<-CONFIG
422
+ filter {
423
+ grok {
424
+ match => { "message" => "%{DATE_EU:stimestamp}" }
425
+ singles => true
426
+ }
427
+ }
428
+ CONFIG
429
+
430
+ sample "11/01/01" do
431
+ insist { subject["stimestamp"] } == "11/01/01"
432
+ end
433
+ end
434
+
435
+ describe "allow dashes in capture names" do
436
+ config <<-CONFIG
437
+ filter {
438
+ grok {
439
+ match => { "message" => "%{WORD:foo-bar}" }
440
+ singles => true
441
+ }
442
+ }
443
+ CONFIG
444
+
445
+ sample "hello world" do
446
+ insist { subject["foo-bar"] } == "hello"
447
+ end
448
+ end
449
+
450
+ describe "performance test", :performance => true do
451
+ event_count = 100000
452
+ min_rate = 2000
453
+
454
+ max_duration = event_count / min_rate
455
+ input = "Nov 24 01:29:01 -0800"
456
+ config <<-CONFIG
457
+ input {
458
+ generator {
459
+ count => #{event_count}
460
+ message => "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"
461
+ }
462
+ }
463
+ filter {
464
+ grok {
465
+ match => { "message" => "%{SYSLOGLINE}" }
466
+ singles => true
467
+ overwrite => [ "message" ]
468
+ }
469
+ }
470
+ output { null { } }
471
+ CONFIG
472
+
473
+ 2.times do
474
+ start = Time.now
475
+ agent do
476
+ duration = (Time.now - start)
477
+ puts "filters/grok parse rate: #{"%02.0f/sec" % (event_count / duration)}, elapsed: #{duration}s"
478
+ insist { duration } < max_duration
479
+ end
480
+ end
481
+ end
482
+
483
+ describe "singles with duplicate-named fields" do
484
+ config <<-CONFIG
485
+ filter {
486
+ grok {
487
+ match => { "message" => "%{INT:foo}|%{WORD:foo}" }
488
+ singles => true
489
+ }
490
+ }
491
+ CONFIG
492
+
493
+ sample "hello world" do
494
+ insist { subject["foo"] }.is_a?(String)
495
+ end
496
+
497
+ sample "123 world" do
498
+ insist { subject["foo"] }.is_a?(String)
499
+ end
500
+ end
501
+
502
+ describe "break_on_match default should be true and first match should exit filter" do
503
+ config <<-CONFIG
504
+ filter {
505
+ grok {
506
+ match => { "message" => "%{INT:foo}"
507
+ "somefield" => "%{INT:bar}"}
508
+ }
509
+ }
510
+ CONFIG
511
+
512
+ sample("message" => "hello world 123", "somefield" => "testme abc 999") do
513
+ insist { subject["foo"] } == "123"
514
+ insist { subject["bar"] }.nil?
515
+ end
516
+ end
517
+
518
+ describe "break_on_match when set to false should try all patterns" do
519
+ config <<-CONFIG
520
+ filter {
521
+ grok {
522
+ match => { "message" => "%{INT:foo}"
523
+ "somefield" => "%{INT:bar}"}
524
+ break_on_match => false
525
+ }
526
+ }
527
+ CONFIG
528
+
529
+ sample("message" => "hello world 123", "somefield" => "testme abc 999") do
530
+ insist { subject["foo"] } == "123"
531
+ insist { subject["bar"] } == "999"
532
+ end
533
+ end
534
+
535
+ describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do
536
+ config <<-CONFIG
537
+ filter {
538
+ grok {
539
+ match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] }
540
+ break_on_match => false
541
+ }
542
+ }
543
+ CONFIG
544
+
545
+ sample "treebranch" do
546
+ insist { subject["name2"] } == "branch"
547
+ end
548
+
549
+ sample "bushbeard" do
550
+ insist { subject["name1"] } == "bush"
551
+ end
552
+
553
+ sample "treebeard" do
554
+ insist { subject["name1"] } == "tree"
555
+ insist { subject["name2"] } == "beard"
556
+ end
557
+ end
558
+
559
+ describe "break_on_match default for array input with single grok pattern" do
560
+ config <<-CONFIG
561
+ filter {
562
+ grok {
563
+ match => { "message" => "%{INT:foo}"}
564
+ }
565
+ }
566
+ CONFIG
567
+
568
+ # array input --
569
+ sample("message" => ["hello world 123", "line 23"]) do
570
+ insist { subject["foo"] } == ["123", "23"]
571
+ insist { subject["tags"] }.nil?
572
+ end
573
+
574
+ # array input, one of them matches
575
+ sample("message" => ["hello world 123", "abc"]) do
576
+ insist { subject["foo"] } == "123"
577
+ insist { subject["tags"] }.nil?
578
+ end
579
+ end
580
+
581
+ describe "break_on_match = true (default) for array input with multiple grok pattern" do
582
+ config <<-CONFIG
583
+ filter {
584
+ grok {
585
+ match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
586
+ }
587
+ }
588
+ CONFIG
589
+
590
+ # array input --
591
+ sample("message" => ["hello world 123", "line 23"]) do
592
+ insist { subject["foo"] } == ["123", "23"]
593
+ insist { subject["bar"] }.nil?
594
+ insist { subject["tags"] }.nil?
595
+ end
596
+
597
+ # array input, one of them matches
598
+ sample("message" => ["hello world", "line 23"]) do
599
+ insist { subject["bar"] } == "hello"
600
+ insist { subject["foo"] } == "23"
601
+ insist { subject["tags"] }.nil?
602
+ end
603
+ end
604
+
605
+ describe "break_on_match = false for array input with multiple grok pattern" do
606
+ config <<-CONFIG
607
+ filter {
608
+ grok {
609
+ match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
610
+ break_on_match => false
611
+ }
612
+ }
613
+ CONFIG
614
+
615
+ # array input --
616
+ sample("message" => ["hello world 123", "line 23"]) do
617
+ insist { subject["foo"] } == ["123", "23"]
618
+ insist { subject["bar"] } == ["hello", "line"]
619
+ insist { subject["tags"] }.nil?
620
+ end
621
+
622
+ # array input, one of them matches
623
+ sample("message" => ["hello world", "line 23"]) do
624
+ insist { subject["bar"] } == ["hello", "line"]
625
+ insist { subject["foo"] } == "23"
626
+ insist { subject["tags"] }.nil?
627
+ end
628
+ end
629
+
630
+ describe "grok with unicode" do
631
+ config <<-CONFIG
632
+ filter {
633
+ grok {
634
+ #pattern => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
635
+ pattern => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}"
636
+ }
637
+ }
638
+ CONFIG
639
+
640
+ sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do
641
+ insist { subject["tags"] }.nil?
642
+ insist { subject["syslog_pri"] } == "22"
643
+ insist { subject["syslog_program"] } == "postfix/policy-spf"
644
+ end
645
+ end
646
+
647
+
648
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-grok
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: jls-grok
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '='
38
+ - !ruby/object:Gem::Version
39
+ version: 0.11.0
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '='
45
+ - !ruby/object:Gem::Version
46
+ version: 0.11.0
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-patterns-core
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Grok is currently the best way in logstash to parse crappy unstructured
62
+ log data into something structured and queryable.
63
+ email: richard.pijnenburg@elasticsearch.com
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files: []
67
+ files:
68
+ - .gitignore
69
+ - Gemfile
70
+ - LICENSE
71
+ - Rakefile
72
+ - lib/logstash/filters/grok.rb
73
+ - logstash-filter-grok.gemspec
74
+ - rakelib/publish.rake
75
+ - rakelib/vendor.rake
76
+ - spec/filters/grok_spec.rb
77
+ homepage: http://logstash.net/
78
+ licenses:
79
+ - Apache License (2.0)
80
+ metadata:
81
+ logstash_plugin: 'true'
82
+ group: filter
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.4.1
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Parse arbitrary text and structure it.
103
+ test_files:
104
+ - spec/filters/grok_spec.rb