logstash-filter-grok 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZDZkNTcxMWY2ZWE0ZGMyYTczNGYzYzRjNDdmMDI4NzE5N2IwN2Q5Mg==
5
+ data.tar.gz: !binary |-
6
+ YTJlZDBhZDg2ODViMzNkZjNhMjZmZDc2OTQ2MTFlYTM1MTgyOGNiNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDJlZTRlYzBmYmFjYmRhNzA1OGE5ZTljN2ZkZGNkNzQ0ZTlhNDI0MzNmMTcx
10
+ NjFlNjU5MDgwYjI4ZTY3MTkzMWM5ODFmMjAyZGFlMWYzZTI3YjhjMWU0OGNh
11
+ NTFjYzg2NjRiODNmOWM1NTY0ZGJhMzRlZTdkY2QzN2ZlYjU0OTM=
12
+ data.tar.gz: !binary |-
13
+ ZDk4MGU3MzIzNGJkYTk3OGRhYjhiNjIyNTYzYzg1NGU2YzU3ZDQzNGNiZWMw
14
+ NThhNTBhMDczNmQ0OTM1NTIyYTRmZjkzZTFmNTcxYzliMWVmM2JiNTc2MTVl
15
+ YzhiMDNjM2RlNTI2MjU0OTdmZmE5NzljYmM0NTRhMjg1YmFiYjY=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,363 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+ require "logstash/environment"
5
+ require "logstash/patterns/core"
6
+ require "set"
7
+
8
+ # Parse arbitrary text and structure it.
9
+ #
10
+ # Grok is currently the best way in logstash to parse crappy unstructured log
11
+ # data into something structured and queryable.
12
+ #
13
+ # This tool is perfect for syslog logs, apache and other webserver logs, mysql
14
+ # logs, and in general, any log format that is generally written for humans
15
+ # and not computer consumption.
16
+ #
17
+ # Logstash ships with about 120 patterns by default. You can find them here:
18
+ # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
19
+ # your own trivially. (See the patterns_dir setting)
20
+ #
21
+ # If you need help building patterns to match your logs, you will find the
22
+ # <http://grokdebug.herokuapp.com> too quite useful!
23
+ #
24
+ # #### Grok Basics
25
+ #
26
+ # Grok works by combining text patterns into something that matches your
27
+ # logs.
28
+ #
29
+ # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}`
30
+ #
31
+ # The `SYNTAX` is the name of the pattern that will match your text. For
32
+ # example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
33
+ # be matched by the IP pattern. The syntax is how you match.
34
+ #
35
+ # The `SEMANTIC` is the identifier you give to the piece of text being matched.
36
+ # For example, "3.44" could be the duration of an event, so you could call it
37
+ # simply 'duration'. Further, a string "55.3.244.1" might identify the 'client'
38
+ # making a request.
39
+ #
40
+ # For the above example, your grok filter would look something like this:
41
+ #
42
+ # %{NUMBER:duration} %{IP:client}
43
+ #
44
+ # Optionally you can add a data type conversion to your grok pattern. By default
45
+ # all semantics are saved as strings. If you wish to convert a semantic's data type,
46
+ # for example change a string to an integer then suffix it with the target data type.
47
+ # For example `%{NUMBER:num:int}` which converts the 'num' semantic from a string to an
48
+ # integer. Currently the only supported conversions are `int` and `float`.
49
+ #
50
+ # #### Example
51
+ #
52
+ # With that idea of a syntax and semantic, we can pull out useful fields from a
53
+ # sample log like this fictional http request log:
54
+ #
55
+ # 55.3.244.1 GET /index.html 15824 0.043
56
+ #
57
+ # The pattern for this could be:
58
+ #
59
+ # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
60
+ #
61
+ # A more realistic example, let's read these logs from a file:
62
+ #
63
+ # input {
64
+ # file {
65
+ # path => "/var/log/http.log"
66
+ # }
67
+ # }
68
+ # filter {
69
+ # grok {
70
+ # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" }
71
+ # }
72
+ # }
73
+ #
74
+ # After the grok filter, the event will have a few extra fields in it:
75
+ #
76
+ # * client: 55.3.244.1
77
+ # * method: GET
78
+ # * request: /index.html
79
+ # * bytes: 15824
80
+ # * duration: 0.043
81
+ #
82
+ # #### Regular Expressions
83
+ #
84
+ # Grok sits on top of regular expressions, so any regular expressions are valid
85
+ # in grok as well. The regular expression library is Oniguruma, and you can see
86
+ # the full supported regexp syntax [on the Onigiruma
87
+ # site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt).
88
+ #
89
+ # #### Custom Patterns
90
+ #
91
+ # Sometimes logstash doesn't have a pattern you need. For this, you have
92
+ # a few options.
93
+ #
94
+ # First, you can use the Oniguruma syntax for 'named capture' which will
95
+ # let you match a piece of text and save it as a field:
96
+ #
97
+ # (?<field_name>the pattern here)
98
+ #
99
+ # For example, postfix logs have a 'queue id' that is an 10 or 11-character
100
+ # hexadecimal value. I can capture that easily like this:
101
+ #
102
+ # (?<queue_id>[0-9A-F]{10,11})
103
+ #
104
+ # Alternately, you can create a custom patterns file.
105
+ #
106
+ # * Create a directory called `patterns` with a file in it called `extra`
107
+ # (the file name doesn't matter, but name it meaningfully for yourself)
108
+ # * In that file, write the pattern you need as the pattern name, a space, then
109
+ # the regexp for that pattern.
110
+ #
111
+ # For example, doing the postfix queue id example as above:
112
+ #
113
+ # # contents of ./patterns/postfix:
114
+ # POSTFIX_QUEUEID [0-9A-F]{10,11}
115
+ #
116
+ # Then use the `patterns_dir` setting in this plugin to tell logstash where
117
+ # your custom patterns directory is. Here's a full example with a sample log:
118
+ #
119
+ # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
120
+ #
121
+ # filter {
122
+ # grok {
123
+ # patterns_dir => "./patterns"
124
+ # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" }
125
+ # }
126
+ # }
127
+ #
128
+ # The above will match and result in the following fields:
129
+ #
130
+ # * timestamp: Jan 1 06:25:43
131
+ # * logsource: mailserver14
132
+ # * program: postfix/cleanup
133
+ # * pid: 21403
134
+ # * queue_id: BEF25A72965
135
+ # * syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
136
+ #
137
+ # The `timestamp`, `logsource`, `program`, and `pid` fields come from the
138
+ # SYSLOGBASE pattern which itself is defined by other patterns.
139
+ class LogStash::Filters::Grok < LogStash::Filters::Base
140
+ config_name "grok"
141
+ milestone 3
142
+
143
+ # Specify a pattern to parse with. This will match the 'message' field.
144
+ #
145
+ # If you want to match other fields than message, use the 'match' setting.
146
+ # Multiple patterns is fine.
147
+ config :pattern, :validate => :array, :deprecated => "You should use this instead: match => { \"message\" => \"your pattern here\" }"
148
+
149
+ # A hash of matches of field => value
150
+ #
151
+ # For example:
152
+ #
153
+ # filter {
154
+ # grok { match => { "message" => "Duration: %{NUMBER:duration}" } }
155
+ # }
156
+ #
157
+ # Alternatively, using the old array syntax:
158
+ #
159
+ # filter {
160
+ # grok { match => [ "message", "Duration: %{NUMBER:duration}" ] }
161
+ # }
162
+ #
163
+ config :match, :validate => :hash, :default => {}
164
+
165
+ #
166
+ # logstash ships by default with a bunch of patterns, so you don't
167
+ # necessarily need to define this yourself unless you are adding additional
168
+ # patterns.
169
+ #
170
+ # Pattern files are plain text with format:
171
+ #
172
+ # NAME PATTERN
173
+ #
174
+ # For example:
175
+ #
176
+ # NUMBER \d+
177
+ config :patterns_dir, :validate => :array, :default => []
178
+
179
+ # Drop if matched. Note, this feature may not stay. It is preferable to combine
180
+ # grok + grep filters to do parsing + dropping.
181
+ config :drop_if_match, :validate => :boolean, :default => false
182
+
183
+ # Break on first match. The first successful match by grok will result in the
184
+ # filter being finished. If you want grok to try all patterns (maybe you are
185
+ # parsing different things), then set this to false.
186
+ config :break_on_match, :validate => :boolean, :default => true
187
+
188
+ # If true, only store named captures from grok.
189
+ config :named_captures_only, :validate => :boolean, :default => true
190
+
191
+ # If true, keep empty captures as event fields.
192
+ config :keep_empty_captures, :validate => :boolean, :default => false
193
+
194
+ # If true, make single-value fields simply that value, not an array
195
+ # containing that one value.
196
+ config :singles, :validate => :boolean, :default => true, :deprecated => "This behavior is the default now, you don't need to set it."
197
+
198
+ # Append values to the 'tags' field when there has been no
199
+ # successful match
200
+ config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"]
201
+
202
+ # The fields to overwrite.
203
+ #
204
+ # This allows you to overwrite a value in a field that already exists.
205
+ #
206
+ # For example, if you have a syslog line in the 'message' field, you can
207
+ # overwrite the 'message' field with part of the match like so:
208
+ #
209
+ # filter {
210
+ # grok {
211
+ # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" }
212
+ # overwrite => [ "message" ]
213
+ # }
214
+ # }
215
+ #
216
+ # In this case, a line like "May 29 16:37:11 sadness logger: hello world"
217
+ # will be parsed and 'hello world' will overwrite the original message.
218
+ config :overwrite, :validate => :array, :default => []
219
+
220
+ # Detect if we are running from a jarfile, pick the right path.
221
+ @@patterns_path ||= Set.new
222
+ #@@patterns_path += [LogStash::Environment.pattern_path("*")]
223
+ @@patterns_path += [LogStash::Patterns::Core.path]
224
+
225
+ public
226
+ def initialize(params)
227
+ super(params)
228
+ @match["message"] ||= []
229
+ @match["message"] += @pattern if @pattern # the config 'pattern' value (array)
230
+ # a cache of capture name handler methods.
231
+ @handlers = {}
232
+ end
233
+
234
+ public
235
+ def register
236
+ require "grok-pure" # rubygem 'jls-grok'
237
+
238
+ @patternfiles = []
239
+
240
+ # Have @@patterns_path show first. Last-in pattern definitions win; this
241
+ # will let folks redefine built-in patterns at runtime.
242
+ @patterns_dir = @@patterns_path.to_a + @patterns_dir
243
+ @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir)
244
+ @patterns_dir.each do |path|
245
+ if File.directory?(path)
246
+ path = File.join(path, "*")
247
+ end
248
+
249
+ Dir.glob(path).each do |file|
250
+ @logger.info? and @logger.info("Grok loading patterns from file", :path => file)
251
+ @patternfiles << file
252
+ end
253
+ end
254
+
255
+ @patterns = Hash.new { |h,k| h[k] = [] }
256
+
257
+ @logger.info? and @logger.info("Match data", :match => @match)
258
+
259
+ @match.each do |field, patterns|
260
+ patterns = [patterns] if patterns.is_a?(String)
261
+
262
+ @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns)
263
+ patterns.each do |pattern|
264
+ @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern)
265
+ grok = Grok.new
266
+ grok.logger = @logger unless @logger.nil?
267
+ add_patterns_from_files(@patternfiles, grok)
268
+ grok.compile(pattern, @named_captures_only)
269
+ @patterns[field] << grok
270
+ end
271
+ end # @match.each
272
+ end # def register
273
+
274
+ public
275
+ def filter(event)
276
+ return unless filter?(event)
277
+
278
+ matched = false
279
+ done = false
280
+
281
+ @logger.debug? and @logger.debug("Running grok filter", :event => event);
282
+ @patterns.each do |field, groks|
283
+ if match(groks, field, event)
284
+ matched = true
285
+ break if @break_on_match
286
+ end
287
+ #break if done
288
+ end # @patterns.each
289
+
290
+ if matched
291
+ filter_matched(event)
292
+ else
293
+ # Tag this event if we can't parse it. We can use this later to
294
+ # reparse+reindex logs if we improve the patterns given.
295
+ @tag_on_failure.each do |tag|
296
+ event["tags"] ||= []
297
+ event["tags"] << tag unless event["tags"].include?(tag)
298
+ end
299
+ end
300
+
301
+ @logger.debug? and @logger.debug("Event now: ", :event => event)
302
+ end # def filter
303
+
304
+ private
305
+ def match(groks, field, event)
306
+ input = event[field]
307
+ if input.is_a?(Array)
308
+ success = false
309
+ input.each do |input|
310
+ success |= match_against_groks(groks, input, event)
311
+ end
312
+ return success
313
+ else
314
+ return match_against_groks(groks, input, event)
315
+ end
316
+ rescue StandardError => e
317
+ @logger.warn("Grok regexp threw exception", :exception => e.message)
318
+ end
319
+
320
+ private
321
+ def match_against_groks(groks, input, event)
322
+ matched = false
323
+ groks.each do |grok|
324
+ # Convert anything else to string (number, hash, etc)
325
+ matched = grok.match_and_capture(input.to_s) do |field, value|
326
+ matched = true
327
+ handle(field, value, event)
328
+ end
329
+ break if matched and @break_on_match
330
+ end
331
+ return matched
332
+ end
333
+
334
+ private
335
+ def handle(field, value, event)
336
+ return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
337
+
338
+ if @overwrite.include?(field)
339
+ event[field] = value
340
+ else
341
+ v = event[field]
342
+ if v.nil?
343
+ event[field] = value
344
+ elsif v.is_a?(Array)
345
+ event[field] << value
346
+ elsif v.is_a?(String)
347
+ # Promote to array since we aren't overwriting.
348
+ event[field] = [v, value]
349
+ end
350
+ end
351
+ end
352
+
353
+ private
354
+ def add_patterns_from_files(paths, grok)
355
+ paths.each do |path|
356
+ if !File.exists?(path)
357
+ raise "Grok pattern file does not exist: #{path}"
358
+ end
359
+ grok.add_patterns_from_file(path)
360
+ end
361
+ end # def add_patterns_from_files
362
+
363
+ end # class LogStash::Filters::Grok
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-grok'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Parse arbitrary text and structure it."
7
+ s.description = "Grok is currently the best way in logstash to parse crappy unstructured log data into something structured and queryable."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'jls-grok', ['0.11.0']
26
+ s.add_runtime_dependency 'logstash-patterns-core'
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,648 @@
1
+ # encoding: utf-8
2
+ require "spec_helper"
3
+ require "logstash/filters/grok"
4
+
5
+ describe LogStash::Filters::Grok do
6
+
7
+ describe "simple syslog line" do
8
+ # The logstash config goes here.
9
+ # At this time, only filters are supported.
10
+ config <<-CONFIG
11
+ filter {
12
+ grok {
13
+ match => { "message" => "%{SYSLOGLINE}" }
14
+ singles => true
15
+ overwrite => [ "message" ]
16
+ }
17
+ }
18
+ CONFIG
19
+
20
+ sample "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" do
21
+ insist { subject["tags"] }.nil?
22
+ insist { subject["logsource"] } == "evita"
23
+ insist { subject["timestamp"] } == "Mar 16 00:01:25"
24
+ insist { subject["message"] } == "connect from camomile.cloud9.net[168.100.1.3]"
25
+ insist { subject["program"] } == "postfix/smtpd"
26
+ insist { subject["pid"] } == "1713"
27
+ end
28
+ end
29
+
30
+ describe "ietf 5424 syslog line" do
31
+ # The logstash config goes here.
32
+ # At this time, only filters are supported.
33
+ config <<-CONFIG
34
+ filter {
35
+ grok {
36
+ match => { "message" => "%{SYSLOG5424LINE}" }
37
+ singles => true
38
+ }
39
+ }
40
+ CONFIG
41
+
42
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\"bar\"][id2 baz=\"something\"] Hello, syslog." do
43
+ insist { subject["tags"] }.nil?
44
+ insist { subject["syslog5424_pri"] } == "191"
45
+ insist { subject["syslog5424_ver"] } == "1"
46
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
47
+ insist { subject["syslog5424_host"] } == "paxton.local"
48
+ insist { subject["syslog5424_app"] } == "grokdebug"
49
+ insist { subject["syslog5424_proc"] } == "4123"
50
+ insist { subject["syslog5424_msgid"] } == nil
51
+ insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"][id2 baz=\"something\"]"
52
+ insist { subject["syslog5424_msg"] } == "Hello, syslog."
53
+ end
54
+
55
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - [id1 foo=\"bar\"] No process ID." do
56
+ insist { subject["tags"] }.nil?
57
+ insist { subject["syslog5424_pri"] } == "191"
58
+ insist { subject["syslog5424_ver"] } == "1"
59
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
60
+ insist { subject["syslog5424_host"] } == "paxton.local"
61
+ insist { subject["syslog5424_app"] } == "grokdebug"
62
+ insist { subject["syslog5424_proc"] } == nil
63
+ insist { subject["syslog5424_msgid"] } == nil
64
+ insist { subject["syslog5424_sd"] } == "[id1 foo=\"bar\"]"
65
+ insist { subject["syslog5424_msg"] } == "No process ID."
66
+ end
67
+
68
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - No structured data." do
69
+ insist { subject["tags"] }.nil?
70
+ insist { subject["syslog5424_pri"] } == "191"
71
+ insist { subject["syslog5424_ver"] } == "1"
72
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
73
+ insist { subject["syslog5424_host"] } == "paxton.local"
74
+ insist { subject["syslog5424_app"] } == "grokdebug"
75
+ insist { subject["syslog5424_proc"] } == "4123"
76
+ insist { subject["syslog5424_msgid"] } == nil
77
+ insist { subject["syslog5424_sd"] } == nil
78
+ insist { subject["syslog5424_msg"] } == "No structured data."
79
+ end
80
+
81
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - - No PID or SD." do
82
+ insist { subject["tags"] }.nil?
83
+ insist { subject["syslog5424_pri"] } == "191"
84
+ insist { subject["syslog5424_ver"] } == "1"
85
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
86
+ insist { subject["syslog5424_host"] } == "paxton.local"
87
+ insist { subject["syslog5424_app"] } == "grokdebug"
88
+ insist { subject["syslog5424_proc"] } == nil
89
+ insist { subject["syslog5424_msgid"] } == nil
90
+ insist { subject["syslog5424_sd"] } == nil
91
+ insist { subject["syslog5424_msg"] } == "No PID or SD."
92
+ end
93
+
94
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Missing structured data." do
95
+ insist { subject["tags"] }.nil?
96
+ insist { subject["syslog5424_pri"] } == "191"
97
+ insist { subject["syslog5424_ver"] } == "1"
98
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
99
+ insist { subject["syslog5424_host"] } == "paxton.local"
100
+ insist { subject["syslog5424_app"] } == "grokdebug"
101
+ insist { subject["syslog5424_proc"] } == "4123"
102
+ insist { subject["syslog5424_msgid"] } == nil
103
+ insist { subject["syslog5424_sd"] } == nil
104
+ insist { subject["syslog5424_msg"] } == "Missing structured data."
105
+ end
106
+
107
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - Additional spaces." do
108
+ insist { subject["tags"] }.nil?
109
+ insist { subject["syslog5424_pri"] } == "191"
110
+ insist { subject["syslog5424_ver"] } == "1"
111
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
112
+ insist { subject["syslog5424_host"] } == "paxton.local"
113
+ insist { subject["syslog5424_app"] } == "grokdebug"
114
+ insist { subject["syslog5424_proc"] } == "4123"
115
+ insist { subject["syslog5424_msgid"] } == nil
116
+ insist { subject["syslog5424_sd"] } == nil
117
+ insist { subject["syslog5424_msg"] } == "Additional spaces."
118
+ end
119
+
120
+ sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Additional spaces and missing SD." do
121
+ insist { subject["tags"] }.nil?
122
+ insist { subject["syslog5424_pri"] } == "191"
123
+ insist { subject["syslog5424_ver"] } == "1"
124
+ insist { subject["syslog5424_ts"] } == "2009-06-30T18:30:00+02:00"
125
+ insist { subject["syslog5424_host"] } == "paxton.local"
126
+ insist { subject["syslog5424_app"] } == "grokdebug"
127
+ insist { subject["syslog5424_proc"] } == "4123"
128
+ insist { subject["syslog5424_msgid"] } == nil
129
+ insist { subject["syslog5424_sd"] } == nil
130
+ insist { subject["syslog5424_msg"] } == "Additional spaces and missing SD."
131
+ end
132
+
133
+ sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 dnsmasq-dhcp 8048 - - Appname contains a dash" do
134
+ insist { subject["tags"] }.nil?
135
+ insist { subject["syslog5424_pri"] } == "30"
136
+ insist { subject["syslog5424_ver"] } == "1"
137
+ insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
138
+ insist { subject["syslog5424_host"] } == "osctrl01"
139
+ insist { subject["syslog5424_app"] } == "dnsmasq-dhcp"
140
+ insist { subject["syslog5424_proc"] } == "8048"
141
+ insist { subject["syslog5424_msgid"] } == nil
142
+ insist { subject["syslog5424_sd"] } == nil
143
+ insist { subject["syslog5424_msg"] } == "Appname contains a dash"
144
+ end
145
+
146
+ sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 - 8048 - - Appname is nil" do
147
+ insist { subject["tags"] }.nil?
148
+ insist { subject["syslog5424_pri"] } == "30"
149
+ insist { subject["syslog5424_ver"] } == "1"
150
+ insist { subject["syslog5424_ts"] } == "2014-04-04T16:44:07+02:00"
151
+ insist { subject["syslog5424_host"] } == "osctrl01"
152
+ insist { subject["syslog5424_app"] } == nil
153
+ insist { subject["syslog5424_proc"] } == "8048"
154
+ insist { subject["syslog5424_msgid"] } == nil
155
+ insist { subject["syslog5424_sd"] } == nil
156
+ insist { subject["syslog5424_msg"] } == "Appname is nil"
157
+ end
158
+ end
159
+
160
+ describe "parsing an event with multiple messages (array of strings)", :if => false do
161
+ config <<-CONFIG
162
+ filter {
163
+ grok {
164
+ match => { "message" => "(?:hello|world) %{NUMBER}" }
165
+ named_captures_only => false
166
+ }
167
+ }
168
+ CONFIG
169
+
170
+ sample("message" => [ "hello 12345", "world 23456" ]) do
171
+ insist { subject["NUMBER"] } == [ "12345", "23456" ]
172
+ end
173
+ end
174
+
175
+ describe "coercing matched values" do
176
+ config <<-CONFIG
177
+ filter {
178
+ grok {
179
+ match => { "message" => "%{NUMBER:foo:int} %{NUMBER:bar:float}" }
180
+ singles => true
181
+ }
182
+ }
183
+ CONFIG
184
+
185
+ sample "400 454.33" do
186
+ insist { subject["foo"] } == 400
187
+ insist { subject["foo"] }.is_a?(Fixnum)
188
+ insist { subject["bar"] } == 454.33
189
+ insist { subject["bar"] }.is_a?(Float)
190
+ end
191
+ end
192
+
193
+ describe "in-line pattern definitions" do
194
+ config <<-CONFIG
195
+ filter {
196
+ grok {
197
+ match => { "message" => "%{FIZZLE=\\d+}" }
198
+ named_captures_only => false
199
+ singles => true
200
+ }
201
+ }
202
+ CONFIG
203
+
204
+ sample "hello 1234" do
205
+ insist { subject["FIZZLE"] } == "1234"
206
+ end
207
+ end
208
+
209
+ describe "processing selected fields" do
210
+ config <<-CONFIG
211
+ filter {
212
+ grok {
213
+ match => { "message" => "%{WORD:word}" }
214
+ match => { "examplefield" => "%{NUMBER:num}" }
215
+ break_on_match => false
216
+ singles => true
217
+ }
218
+ }
219
+ CONFIG
220
+
221
+ sample("message" => "hello world", "examplefield" => "12345") do
222
+ insist { subject["examplefield"] } == "12345"
223
+ insist { subject["word"] } == "hello"
224
+ end
225
+ end
226
+
227
+ describe "adding fields on match" do
228
+ config <<-CONFIG
229
+ filter {
230
+ grok {
231
+ match => { "message" => "matchme %{NUMBER:fancy}" }
232
+ singles => true
233
+ add_field => [ "new_field", "%{fancy}" ]
234
+ }
235
+ }
236
+ CONFIG
237
+
238
+ sample "matchme 1234" do
239
+ insist { subject["tags"] }.nil?
240
+ insist { subject["new_field"] } == "1234"
241
+ end
242
+
243
+ sample "this will not be matched" do
244
+ insist { subject["tags"] }.include?("_grokparsefailure")
245
+ reject { subject }.include?("new_field")
246
+ end
247
+ end
248
+
249
+ context "empty fields" do
250
+ describe "drop by default" do
251
+ config <<-CONFIG
252
+ filter {
253
+ grok {
254
+ match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
255
+ }
256
+ }
257
+ CONFIG
258
+
259
+ sample "1=test" do
260
+ insist { subject["tags"] }.nil?
261
+ insist { subject }.include?("foo1")
262
+
263
+ # Since 'foo2' was not captured, it must not be present in the event.
264
+ reject { subject }.include?("foo2")
265
+ end
266
+ end
267
+
268
+ describe "keep if keep_empty_captures is true" do
269
+ config <<-CONFIG
270
+ filter {
271
+ grok {
272
+ match => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }
273
+ keep_empty_captures => true
274
+ }
275
+ }
276
+ CONFIG
277
+
278
+ sample "1=test" do
279
+ insist { subject["tags"] }.nil?
280
+ # use .to_hash for this test, for now, because right now
281
+ # the Event.include? returns false for missing fields as well
282
+ # as for fields with nil values.
283
+ insist { subject.to_hash }.include?("foo2")
284
+ insist { subject.to_hash }.include?("foo2")
285
+ end
286
+ end
287
+ end
288
+
289
+ describe "when named_captures_only == false" do
290
+ config <<-CONFIG
291
+ filter {
292
+ grok {
293
+ match => { "message" => "Hello %{WORD}. %{WORD:foo}" }
294
+ named_captures_only => false
295
+ singles => true
296
+ }
297
+ }
298
+ CONFIG
299
+
300
+ sample "Hello World, yo!" do
301
+ insist { subject }.include?("WORD")
302
+ insist { subject["WORD"] } == "World"
303
+ insist { subject }.include?("foo")
304
+ insist { subject["foo"] } == "yo"
305
+ end
306
+ end
307
+
308
+ describe "using oniguruma named captures (?<name>regex)" do
309
+ context "plain regexp" do
310
+ config <<-'CONFIG'
311
+ filter {
312
+ grok {
313
+ singles => true
314
+ match => { "message" => "(?<foo>\w+)" }
315
+ }
316
+ }
317
+ CONFIG
318
+ sample "hello world" do
319
+ insist { subject["tags"] }.nil?
320
+ insist { subject["foo"] } == "hello"
321
+ end
322
+ end
323
+
324
+ context "grok patterns" do
325
+ config <<-'CONFIG'
326
+ filter {
327
+ grok {
328
+ singles => true
329
+ match => { "message" => "(?<timestamp>%{DATE_EU} %{TIME})" }
330
+ }
331
+ }
332
+ CONFIG
333
+
334
+ sample "fancy 12-12-12 12:12:12" do
335
+ insist { subject["tags"] }.nil?
336
+ insist { subject["timestamp"] } == "12-12-12 12:12:12"
337
+ end
338
+ end
339
+ end
340
+
341
+ describe "grok on integer types" do
342
+ config <<-'CONFIG'
343
+ filter {
344
+ grok {
345
+ match => { "status" => "^403$" }
346
+ add_tag => "four_oh_three"
347
+ }
348
+ }
349
+ CONFIG
350
+
351
+ sample("status" => 403) do
352
+ reject { subject["tags"] }.include?("_grokparsefailure")
353
+ insist { subject["tags"] }.include?("four_oh_three")
354
+ end
355
+ end
356
+
357
+ describe "grok on float types" do
358
+ config <<-'CONFIG'
359
+ filter {
360
+ grok {
361
+ match => { "version" => "^1.0$" }
362
+ add_tag => "one_point_oh"
363
+ }
364
+ }
365
+ CONFIG
366
+
367
+ sample("version" => 1.0) do
368
+ insist { subject["tags"] }.include?("one_point_oh")
369
+ insist { subject["tags"] }.include?("one_point_oh")
370
+ end
371
+ end
372
+
373
+ describe "grok on %{LOGLEVEL}" do
374
+ config <<-'CONFIG'
375
+ filter {
376
+ grok {
377
+ pattern => "%{LOGLEVEL:level}: error!"
378
+ }
379
+ }
380
+ CONFIG
381
+
382
+ log_level_names = %w(
383
+ trace Trace TRACE
384
+ debug Debug DEBUG
385
+ notice Notice Notice
386
+ info Info INFO
387
+ warn warning Warn Warning WARN WARNING
388
+ err error Err Error ERR ERROR
389
+ crit critical Crit Critical CRIT CRITICAL
390
+ fatal Fatal FATAL
391
+ severe Severe SEVERE
392
+ emerg emergency Emerg Emergency EMERG EMERGENCY
393
+ )
394
+ log_level_names.each do |level_name|
395
+ sample "#{level_name}: error!" do
396
+ insist { subject['level'] } == level_name
397
+ end
398
+ end
399
+ end
400
+
401
+ describe "tagging on failure" do
402
+ config <<-CONFIG
403
+ filter {
404
+ grok {
405
+ match => { "message" => "matchme %{NUMBER:fancy}" }
406
+ tag_on_failure => false
407
+ }
408
+ }
409
+ CONFIG
410
+
411
+ sample "matchme 1234" do
412
+ insist { subject["tags"] }.nil?
413
+ end
414
+
415
+ sample "this will not be matched" do
416
+ insist { subject["tags"] }.include?("false")
417
+ end
418
+ end
419
+
420
+ describe "captures named fields even if the whole text matches" do
421
+ config <<-CONFIG
422
+ filter {
423
+ grok {
424
+ match => { "message" => "%{DATE_EU:stimestamp}" }
425
+ singles => true
426
+ }
427
+ }
428
+ CONFIG
429
+
430
+ sample "11/01/01" do
431
+ insist { subject["stimestamp"] } == "11/01/01"
432
+ end
433
+ end
434
+
435
+ describe "allow dashes in capture names" do
436
+ config <<-CONFIG
437
+ filter {
438
+ grok {
439
+ match => { "message" => "%{WORD:foo-bar}" }
440
+ singles => true
441
+ }
442
+ }
443
+ CONFIG
444
+
445
+ sample "hello world" do
446
+ insist { subject["foo-bar"] } == "hello"
447
+ end
448
+ end
449
+
450
+ describe "performance test", :performance => true do
451
+ event_count = 100000
452
+ min_rate = 2000
453
+
454
+ max_duration = event_count / min_rate
455
+ input = "Nov 24 01:29:01 -0800"
456
+ config <<-CONFIG
457
+ input {
458
+ generator {
459
+ count => #{event_count}
460
+ message => "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"
461
+ }
462
+ }
463
+ filter {
464
+ grok {
465
+ match => { "message" => "%{SYSLOGLINE}" }
466
+ singles => true
467
+ overwrite => [ "message" ]
468
+ }
469
+ }
470
+ output { null { } }
471
+ CONFIG
472
+
473
+ 2.times do
474
+ start = Time.now
475
+ agent do
476
+ duration = (Time.now - start)
477
+ puts "filters/grok parse rate: #{"%02.0f/sec" % (event_count / duration)}, elapsed: #{duration}s"
478
+ insist { duration } < max_duration
479
+ end
480
+ end
481
+ end
482
+
483
+ describe "singles with duplicate-named fields" do
484
+ config <<-CONFIG
485
+ filter {
486
+ grok {
487
+ match => { "message" => "%{INT:foo}|%{WORD:foo}" }
488
+ singles => true
489
+ }
490
+ }
491
+ CONFIG
492
+
493
+ sample "hello world" do
494
+ insist { subject["foo"] }.is_a?(String)
495
+ end
496
+
497
+ sample "123 world" do
498
+ insist { subject["foo"] }.is_a?(String)
499
+ end
500
+ end
501
+
502
+ describe "break_on_match default should be true and first match should exit filter" do
503
+ config <<-CONFIG
504
+ filter {
505
+ grok {
506
+ match => { "message" => "%{INT:foo}"
507
+ "somefield" => "%{INT:bar}"}
508
+ }
509
+ }
510
+ CONFIG
511
+
512
+ sample("message" => "hello world 123", "somefield" => "testme abc 999") do
513
+ insist { subject["foo"] } == "123"
514
+ insist { subject["bar"] }.nil?
515
+ end
516
+ end
517
+
518
+ describe "break_on_match when set to false should try all patterns" do
519
+ config <<-CONFIG
520
+ filter {
521
+ grok {
522
+ match => { "message" => "%{INT:foo}"
523
+ "somefield" => "%{INT:bar}"}
524
+ break_on_match => false
525
+ }
526
+ }
527
+ CONFIG
528
+
529
+ sample("message" => "hello world 123", "somefield" => "testme abc 999") do
530
+ insist { subject["foo"] } == "123"
531
+ insist { subject["bar"] } == "999"
532
+ end
533
+ end
534
+
535
+ describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do
536
+ config <<-CONFIG
537
+ filter {
538
+ grok {
539
+ match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] }
540
+ break_on_match => false
541
+ }
542
+ }
543
+ CONFIG
544
+
545
+ sample "treebranch" do
546
+ insist { subject["name2"] } == "branch"
547
+ end
548
+
549
+ sample "bushbeard" do
550
+ insist { subject["name1"] } == "bush"
551
+ end
552
+
553
+ sample "treebeard" do
554
+ insist { subject["name1"] } == "tree"
555
+ insist { subject["name2"] } == "beard"
556
+ end
557
+ end
558
+
559
+ describe "break_on_match default for array input with single grok pattern" do
560
+ config <<-CONFIG
561
+ filter {
562
+ grok {
563
+ match => { "message" => "%{INT:foo}"}
564
+ }
565
+ }
566
+ CONFIG
567
+
568
+ # array input --
569
+ sample("message" => ["hello world 123", "line 23"]) do
570
+ insist { subject["foo"] } == ["123", "23"]
571
+ insist { subject["tags"] }.nil?
572
+ end
573
+
574
+ # array input, one of them matches
575
+ sample("message" => ["hello world 123", "abc"]) do
576
+ insist { subject["foo"] } == "123"
577
+ insist { subject["tags"] }.nil?
578
+ end
579
+ end
580
+
581
+ describe "break_on_match = true (default) for array input with multiple grok pattern" do
582
+ config <<-CONFIG
583
+ filter {
584
+ grok {
585
+ match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
586
+ }
587
+ }
588
+ CONFIG
589
+
590
+ # array input --
591
+ sample("message" => ["hello world 123", "line 23"]) do
592
+ insist { subject["foo"] } == ["123", "23"]
593
+ insist { subject["bar"] }.nil?
594
+ insist { subject["tags"] }.nil?
595
+ end
596
+
597
+ # array input, one of them matches
598
+ sample("message" => ["hello world", "line 23"]) do
599
+ insist { subject["bar"] } == "hello"
600
+ insist { subject["foo"] } == "23"
601
+ insist { subject["tags"] }.nil?
602
+ end
603
+ end
604
+
605
+ describe "break_on_match = false for array input with multiple grok pattern" do
606
+ config <<-CONFIG
607
+ filter {
608
+ grok {
609
+ match => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }
610
+ break_on_match => false
611
+ }
612
+ }
613
+ CONFIG
614
+
615
+ # array input --
616
+ sample("message" => ["hello world 123", "line 23"]) do
617
+ insist { subject["foo"] } == ["123", "23"]
618
+ insist { subject["bar"] } == ["hello", "line"]
619
+ insist { subject["tags"] }.nil?
620
+ end
621
+
622
+ # array input, one of them matches
623
+ sample("message" => ["hello world", "line 23"]) do
624
+ insist { subject["bar"] } == ["hello", "line"]
625
+ insist { subject["foo"] } == "23"
626
+ insist { subject["tags"] }.nil?
627
+ end
628
+ end
629
+
630
+ describe "grok with unicode" do
631
+ config <<-CONFIG
632
+ filter {
633
+ grok {
634
+ #pattern => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
635
+ pattern => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}"
636
+ }
637
+ }
638
+ CONFIG
639
+
640
+ sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do
641
+ insist { subject["tags"] }.nil?
642
+ insist { subject["syslog_pri"] } == "22"
643
+ insist { subject["syslog_program"] } == "postfix/policy-spf"
644
+ end
645
+ end
646
+
647
+
648
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-grok
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: jls-grok
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '='
38
+ - !ruby/object:Gem::Version
39
+ version: 0.11.0
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '='
45
+ - !ruby/object:Gem::Version
46
+ version: 0.11.0
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-patterns-core
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Grok is currently the best way in logstash to parse crappy unstructured
62
+ log data into something structured and queryable.
63
+ email: richard.pijnenburg@elasticsearch.com
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files: []
67
+ files:
68
+ - .gitignore
69
+ - Gemfile
70
+ - LICENSE
71
+ - Rakefile
72
+ - lib/logstash/filters/grok.rb
73
+ - logstash-filter-grok.gemspec
74
+ - rakelib/publish.rake
75
+ - rakelib/vendor.rake
76
+ - spec/filters/grok_spec.rb
77
+ homepage: http://logstash.net/
78
+ licenses:
79
+ - Apache License (2.0)
80
+ metadata:
81
+ logstash_plugin: 'true'
82
+ group: filter
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.4.1
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Parse arbitrary text and structure it.
103
+ test_files:
104
+ - spec/filters/grok_spec.rb