logstash-filter-multiline 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +3 -0
- data/LICENSE +13 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/multiline.rb +280 -0
- data/logstash-filter-multiline.gemspec +29 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/multiline_spec.rb +153 -0
- metadata +119 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Yjg5MGE4OTE3MmE0MzUwNWNkYmNjZDllYzYwNGJiNzFmNTJhZjBkOQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YWZjYTY3MjIxMmMzOGI3OTE0N2ExMTk2NWI3ZjAyYTYxZWZjYWZlNA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDYyMzgwYTllZWY1NjM2ZDY2N2YwOTQwZjVmMWE1MGNjYzI4ODM0YjYwZWRi
|
10
|
+
ZDAzNGJhMjZiM2NkNzUyMGI4YTczNWMzMWM0YTI1NjRhY2RlM2EyZWZjNGI5
|
11
|
+
YTVkYmI0MWVmYzA3NGJkM2ZhMThhZDFlZTkxMzAwNGU0MjY5N2Y=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjdhOGRjOTk5NTUzYTAxZjRiODZkN2VmYmFjNjUyYjA1NjZiM2U0N2I4YTg3
|
14
|
+
ZjEyZWI0YTQ2NTdlYzEzMzNjYjgwNjQxZjYyYTg1ZmNkNjRhYzc1M2NiNDI0
|
15
|
+
MDUwOTNiNzJlMjY1M2Q0Mzk0ZThjNmY1ZTJjZDNjYzc3OGI5MWE=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,280 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/environment"
|
5
|
+
require "logstash/patterns/core"
|
6
|
+
require "set"
|
7
|
+
#
|
8
|
+
# This filter will collapse multiline messages from a single source into one Logstash event.
|
9
|
+
#
|
10
|
+
# The original goal of this filter was to allow joining of multi-line messages
|
11
|
+
# from files into a single event. For example - joining java exception and
|
12
|
+
# stacktrace messages into a single event.
|
13
|
+
#
|
14
|
+
# NOTE: This filter will not work with multiple worker threads `-w 2` on the logstash command line.
|
15
|
+
#
|
16
|
+
# The config looks like this:
|
17
|
+
# [source,ruby]
|
18
|
+
# filter {
|
19
|
+
# multiline {
|
20
|
+
# type => "type"
|
21
|
+
# pattern => "pattern, a regexp"
|
22
|
+
# negate => boolean
|
23
|
+
# what => "previous" or "next"
|
24
|
+
# }
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# The `pattern` should be a regexp which matches what you believe to be an indicator
|
28
|
+
# that the field is part of an event consisting of multiple lines of log data.
|
29
|
+
#
|
30
|
+
# The `what` must be `previous` or `next` and indicates the relation
|
31
|
+
# to the multi-line event.
|
32
|
+
#
|
33
|
+
# The `negate` can be `true` or `false` (defaults to `false`). If `true`, a
|
34
|
+
# message not matching the pattern will constitute a match of the multiline
|
35
|
+
# filter and the `what` will be applied. (vice-versa is also true)
|
36
|
+
#
|
37
|
+
# For example, Java stack traces are multiline and usually have the message
|
38
|
+
# starting at the far-left, with each subsequent line indented. Do this:
|
39
|
+
# [source,ruby]
|
40
|
+
# filter {
|
41
|
+
# multiline {
|
42
|
+
# type => "somefiletype"
|
43
|
+
# pattern => "^\s"
|
44
|
+
# what => "previous"
|
45
|
+
# }
|
46
|
+
# }
|
47
|
+
#
|
48
|
+
# This says that any line starting with whitespace belongs to the previous line.
|
49
|
+
#
|
50
|
+
# Another example is C line continuations (backslash). Here's how to do that:
|
51
|
+
# [source,ruby]
|
52
|
+
# filter {
|
53
|
+
# multiline {
|
54
|
+
# type => "somefiletype "
|
55
|
+
# pattern => "\\$"
|
56
|
+
# what => "next"
|
57
|
+
# }
|
58
|
+
# }
|
59
|
+
#
|
60
|
+
# This says that any line ending with a backslash should be combined with the
|
61
|
+
# following line.
|
62
|
+
#
|
63
|
+
class LogStash::Filters::Multiline < LogStash::Filters::Base
|
64
|
+
|
65
|
+
config_name "multiline"
|
66
|
+
milestone 3
|
67
|
+
|
68
|
+
# The regular expression to match.
|
69
|
+
config :pattern, :validate => :string, :required => true
|
70
|
+
|
71
|
+
# If the pattern matched, does event belong to the next or previous event?
|
72
|
+
config :what, :validate => ["previous", "next"], :required => true
|
73
|
+
|
74
|
+
# Negate the regexp pattern ('if not matched')
|
75
|
+
config :negate, :validate => :boolean, :default => false
|
76
|
+
|
77
|
+
# The stream identity is how the multiline filter determines which stream an
|
78
|
+
# event belongs to. This is generally used for differentiating, say, events
|
79
|
+
# coming from multiple files in the same file input, or multiple connections
|
80
|
+
# coming from a tcp input.
|
81
|
+
#
|
82
|
+
# The default value here is usually what you want, but there are some cases
|
83
|
+
# where you want to change it. One such example is if you are using a tcp
|
84
|
+
# input with only one client connecting at any time. If that client
|
85
|
+
# reconnects (due to error or client restart), then logstash will identify
|
86
|
+
# the new connection as a new stream and break any multiline goodness that
|
87
|
+
# may have occurred between the old and new connection. To solve this use
|
88
|
+
# case, you can use `%{@source_host}.%{@type}` instead.
|
89
|
+
config :stream_identity , :validate => :string, :default => "%{host}.%{path}.%{type}"
|
90
|
+
|
91
|
+
# Logstash ships by default with a bunch of patterns, so you don't
|
92
|
+
# necessarily need to define this yourself unless you are adding additional
|
93
|
+
# patterns.
|
94
|
+
#
|
95
|
+
# Pattern files are plain text with format:
|
96
|
+
# [source,ruby]
|
97
|
+
# NAME PATTERN
|
98
|
+
#
|
99
|
+
# For example:
|
100
|
+
# [source,ruby]
|
101
|
+
# NUMBER \d+
|
102
|
+
config :patterns_dir, :validate => :array, :default => []
|
103
|
+
|
104
|
+
# The maximum age an event can be (in seconds) before it is automatically
|
105
|
+
# flushed.
|
106
|
+
config :max_age, :validate => :number, :default => 5
|
107
|
+
|
108
|
+
# Call the filter flush method at regular interval.
|
109
|
+
# Optional.
|
110
|
+
config :periodic_flush, :validate => :boolean, :default => true
|
111
|
+
|
112
|
+
|
113
|
+
# Detect if we are running from a jarfile, pick the right path.
|
114
|
+
@@patterns_path = Set.new
|
115
|
+
@@patterns_path += [LogStash::Patterns::Core.path]
|
116
|
+
|
117
|
+
MULTILINE_TAG = "multiline"
|
118
|
+
|
119
|
+
public
|
120
|
+
def initialize(config = {})
|
121
|
+
super
|
122
|
+
|
123
|
+
# this filter cannot be parallelized because message order
|
124
|
+
# cannot be garanteed across threads, line #2 could be processed
|
125
|
+
# before line #1
|
126
|
+
@threadsafe = false
|
127
|
+
|
128
|
+
# this filter needs to keep state
|
129
|
+
@pending = Hash.new
|
130
|
+
end # def initialize
|
131
|
+
|
132
|
+
public
|
133
|
+
def register
|
134
|
+
require "grok-pure" # rubygem 'jls-grok'
|
135
|
+
|
136
|
+
@grok = Grok.new
|
137
|
+
|
138
|
+
@patterns_dir = @@patterns_path.to_a + @patterns_dir
|
139
|
+
@patterns_dir.each do |path|
|
140
|
+
if File.directory?(path)
|
141
|
+
path = File.join(path, "*")
|
142
|
+
end
|
143
|
+
|
144
|
+
Dir.glob(path).each do |file|
|
145
|
+
@logger.info("Grok loading patterns from file", :path => file)
|
146
|
+
@grok.add_patterns_from_file(file)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
@grok.compile(@pattern)
|
151
|
+
|
152
|
+
case @what
|
153
|
+
when "previous"
|
154
|
+
class << self; alias_method :multiline_filter!, :previous_filter!; end
|
155
|
+
when "next"
|
156
|
+
class << self; alias_method :multiline_filter!, :next_filter!; end
|
157
|
+
else
|
158
|
+
# we should never get here since @what is validated at config
|
159
|
+
raise(ArgumentError, "Unknown multiline 'what' value")
|
160
|
+
end # case @what
|
161
|
+
|
162
|
+
@logger.debug("Registered multiline plugin", :type => @type, :config => @config)
|
163
|
+
end # def register
|
164
|
+
|
165
|
+
public
|
166
|
+
def filter(event)
|
167
|
+
return unless filter?(event)
|
168
|
+
|
169
|
+
match = event["message"].is_a?(Array) ? @grok.match(event["message"].first) : @grok.match(event["message"])
|
170
|
+
match = (match and !@negate) || (!match and @negate) # add negate option
|
171
|
+
|
172
|
+
@logger.debug? && @logger.debug("Multiline", :pattern => @pattern, :message => event["message"], :match => match, :negate => @negate)
|
173
|
+
|
174
|
+
multiline_filter!(event, match)
|
175
|
+
|
176
|
+
unless event.cancelled?
|
177
|
+
collapse_event!(event)
|
178
|
+
filter_matched(event) if match
|
179
|
+
end
|
180
|
+
end # def filter
|
181
|
+
|
182
|
+
# flush any pending messages
|
183
|
+
# called at regular interval without options and at pipeline shutdown with the :final => true option
|
184
|
+
# @param options [Hash]
|
185
|
+
# @option options [Boolean] :final => true to signal a final shutdown flush
|
186
|
+
# @return [Array<LogStash::Event>] list of flushed events
|
187
|
+
public
|
188
|
+
def flush(options = {})
|
189
|
+
expired = nil
|
190
|
+
|
191
|
+
# note that thread safety concerns are not necessary here because the multiline filter
|
192
|
+
# is not thread safe thus cannot be run in multiple folterworker threads and flushing
|
193
|
+
# is called by the same thread
|
194
|
+
|
195
|
+
# select all expired events from the @pending hash into a new expired hash
|
196
|
+
# if :final flush then select all events
|
197
|
+
expired = @pending.inject({}) do |r, (key, event)|
|
198
|
+
age = Time.now - Array(event["@timestamp"]).first.time
|
199
|
+
r[key] = event if (age >= @max_age) || options[:final]
|
200
|
+
r
|
201
|
+
end
|
202
|
+
|
203
|
+
# delete expired items from @pending hash
|
204
|
+
expired.each{|key, event| @pending.delete(key)}
|
205
|
+
|
206
|
+
# return list of uncancelled and collapsed expired events
|
207
|
+
expired.map{|key, event| event.uncancel; collapse_event!(event)}
|
208
|
+
end # def flush
|
209
|
+
|
210
|
+
public
|
211
|
+
def teardown
|
212
|
+
# nothing to do
|
213
|
+
end
|
214
|
+
|
215
|
+
private
|
216
|
+
|
217
|
+
def previous_filter!(event, match)
|
218
|
+
key = event.sprintf(@stream_identity)
|
219
|
+
|
220
|
+
pending = @pending[key]
|
221
|
+
|
222
|
+
if match
|
223
|
+
event.tag(MULTILINE_TAG)
|
224
|
+
# previous previous line is part of this event.
|
225
|
+
# append it to the event and cancel it
|
226
|
+
if pending
|
227
|
+
pending.append(event)
|
228
|
+
else
|
229
|
+
@pending[key] = event
|
230
|
+
end
|
231
|
+
event.cancel
|
232
|
+
else
|
233
|
+
# this line is not part of the previous event
|
234
|
+
# if we have a pending event, it's done, send it.
|
235
|
+
# put the current event into pending
|
236
|
+
if pending
|
237
|
+
tmp = event.to_hash
|
238
|
+
event.overwrite(pending)
|
239
|
+
@pending[key] = LogStash::Event.new(tmp)
|
240
|
+
else
|
241
|
+
@pending[key] = event
|
242
|
+
event.cancel
|
243
|
+
end
|
244
|
+
end # if match
|
245
|
+
end
|
246
|
+
|
247
|
+
def next_filter!(event, match)
|
248
|
+
key = event.sprintf(@stream_identity)
|
249
|
+
|
250
|
+
# protect @pending for race condition between the flush thread and the worker thread
|
251
|
+
pending = @pending[key]
|
252
|
+
|
253
|
+
if match
|
254
|
+
event.tag(MULTILINE_TAG)
|
255
|
+
# this line is part of a multiline event, the next
|
256
|
+
# line will be part, too, put it into pending.
|
257
|
+
if pending
|
258
|
+
pending.append(event)
|
259
|
+
else
|
260
|
+
@pending[key] = event
|
261
|
+
end
|
262
|
+
event.cancel
|
263
|
+
else
|
264
|
+
# if we have something in pending, join it with this message
|
265
|
+
# and send it. otherwise, this is a new message and not part of
|
266
|
+
# multiline, send it.
|
267
|
+
if pending
|
268
|
+
pending.append(event)
|
269
|
+
event.overwrite(pending)
|
270
|
+
@pending.delete(key)
|
271
|
+
end
|
272
|
+
end # if match
|
273
|
+
end
|
274
|
+
|
275
|
+
def collapse_event!(event)
|
276
|
+
event["message"] = event["message"].join("\n") if event["message"].is_a?(Array)
|
277
|
+
event.timestamp = event.timestamp.first if event.timestamp.is_a?(Array)
|
278
|
+
event
|
279
|
+
end
|
280
|
+
end # class LogStash::Filters::Multiline
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-filter-multiline'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "This filter will collapse multiline messages from a single source into one Logstash event."
|
7
|
+
s.description = "This filter will collapse multiline messages from a single source into one Logstash event."
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
s.add_runtime_dependency 'logstash-patterns-core'
|
25
|
+
s.add_runtime_dependency 'logstash-filter-mutate'
|
26
|
+
s.add_runtime_dependency 'jls-grok', '~> 0.11.0'
|
27
|
+
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
require "logstash/filters/multiline"
|
5
|
+
|
6
|
+
describe LogStash::Filters::Multiline do
|
7
|
+
|
8
|
+
describe "simple multiline" do
|
9
|
+
config <<-CONFIG
|
10
|
+
filter {
|
11
|
+
multiline {
|
12
|
+
periodic_flush => false
|
13
|
+
pattern => "^\\s"
|
14
|
+
what => previous
|
15
|
+
}
|
16
|
+
}
|
17
|
+
CONFIG
|
18
|
+
|
19
|
+
sample [ "hello world", " second line", "another first line" ] do
|
20
|
+
expect(subject).to be_a(Array)
|
21
|
+
insist { subject.size } == 2
|
22
|
+
insist { subject[0]["message"] } == "hello world\n second line"
|
23
|
+
insist { subject[1]["message"] } == "another first line"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "multiline using grok patterns" do
|
28
|
+
config <<-CONFIG
|
29
|
+
filter {
|
30
|
+
multiline {
|
31
|
+
pattern => "^%{NUMBER} %{TIME}"
|
32
|
+
negate => true
|
33
|
+
what => previous
|
34
|
+
}
|
35
|
+
}
|
36
|
+
CONFIG
|
37
|
+
|
38
|
+
sample [ "120913 12:04:33 first line", "second line", "third line" ] do
|
39
|
+
insist { subject["message"] } == "120913 12:04:33 first line\nsecond line\nthird line"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "multiline safety among multiple concurrent streams" do
|
44
|
+
config <<-CONFIG
|
45
|
+
filter {
|
46
|
+
multiline {
|
47
|
+
pattern => "^\\s"
|
48
|
+
what => previous
|
49
|
+
}
|
50
|
+
}
|
51
|
+
CONFIG
|
52
|
+
|
53
|
+
count = 50
|
54
|
+
stream_count = 3
|
55
|
+
|
56
|
+
# first make sure to have starting lines for all streams
|
57
|
+
eventstream = stream_count.times.map do |i|
|
58
|
+
stream = "stream#{i}"
|
59
|
+
lines = [LogStash::Event.new("message" => "hello world #{stream}", "host" => stream, "type" => stream)]
|
60
|
+
lines += rand(5).times.map do |n|
|
61
|
+
LogStash::Event.new("message" => " extra line in #{stream}", "host" => stream, "type" => stream)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# them add starting lines for random stream with sublines also for random stream
|
66
|
+
eventstream += (count - stream_count).times.map do |i|
|
67
|
+
stream = "stream#{rand(stream_count)}"
|
68
|
+
lines = [LogStash::Event.new("message" => "hello world #{stream}", "host" => stream, "type" => stream)]
|
69
|
+
lines += rand(5).times.map do |n|
|
70
|
+
stream = "stream#{rand(stream_count)}"
|
71
|
+
LogStash::Event.new("message" => " extra line in #{stream}", "host" => stream, "type" => stream)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
events = eventstream.flatten.map{|event| event.to_hash}
|
76
|
+
|
77
|
+
sample events do
|
78
|
+
expect(subject).to be_a(Array)
|
79
|
+
insist { subject.size } == count
|
80
|
+
|
81
|
+
subject.each_with_index do |event, i|
|
82
|
+
insist { event["type"] == event["host"] } == true
|
83
|
+
stream = event["type"]
|
84
|
+
insist { event["message"].split("\n").first } =~ /hello world /
|
85
|
+
insist { event["message"].scan(/stream\d/).all?{|word| word == stream} } == true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "multiline add/remove tags and fields only when matched" do
|
91
|
+
config <<-CONFIG
|
92
|
+
filter {
|
93
|
+
mutate {
|
94
|
+
add_tag => "dummy"
|
95
|
+
}
|
96
|
+
multiline {
|
97
|
+
add_tag => [ "nope" ]
|
98
|
+
remove_tag => "dummy"
|
99
|
+
add_field => [ "dummy2", "value" ]
|
100
|
+
pattern => "an unlikely match"
|
101
|
+
what => previous
|
102
|
+
}
|
103
|
+
}
|
104
|
+
CONFIG
|
105
|
+
|
106
|
+
sample [ "120913 12:04:33 first line", "120913 12:04:33 second line" ] do
|
107
|
+
expect(subject).to be_a(Array)
|
108
|
+
insist { subject.size } == 2
|
109
|
+
|
110
|
+
subject.each do |s|
|
111
|
+
insist { s["tags"].include?("nope") } == false
|
112
|
+
insist { s["tags"].include?("dummy") } == true
|
113
|
+
insist { s.include?("dummy2") } == false
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "regression test for GH issue #1258" do
|
119
|
+
config <<-CONFIG
|
120
|
+
filter {
|
121
|
+
multiline {
|
122
|
+
pattern => "^\s"
|
123
|
+
what => "next"
|
124
|
+
add_tag => ["multi"]
|
125
|
+
}
|
126
|
+
}
|
127
|
+
CONFIG
|
128
|
+
|
129
|
+
sample [ " match", "nomatch" ] do
|
130
|
+
expect(subject).to be_a(LogStash::Event)
|
131
|
+
insist { subject["message"] } == " match\nnomatch"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "multiple match/nomatch" do
|
136
|
+
config <<-CONFIG
|
137
|
+
filter {
|
138
|
+
multiline {
|
139
|
+
pattern => "^\s"
|
140
|
+
what => "next"
|
141
|
+
add_tag => ["multi"]
|
142
|
+
}
|
143
|
+
}
|
144
|
+
CONFIG
|
145
|
+
|
146
|
+
sample [" match1", "nomatch1", " match2", "nomatch2"] do
|
147
|
+
expect(subject).to be_a(Array)
|
148
|
+
insist { subject.size } == 2
|
149
|
+
insist { subject[0]["message"] } == " match1\nnomatch1"
|
150
|
+
insist { subject[1]["message"] } == " match2\nnomatch2"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
metadata
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-filter-multiline
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: logstash-patterns-core
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: logstash-filter-mutate
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: jls-grok
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ~>
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: 0.11.0
|
68
|
+
type: :runtime
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ~>
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 0.11.0
|
75
|
+
description: This filter will collapse multiline messages from a single source into
|
76
|
+
one Logstash event.
|
77
|
+
email: richard.pijnenburg@elasticsearch.com
|
78
|
+
executables: []
|
79
|
+
extensions: []
|
80
|
+
extra_rdoc_files: []
|
81
|
+
files:
|
82
|
+
- .gitignore
|
83
|
+
- Gemfile
|
84
|
+
- LICENSE
|
85
|
+
- Rakefile
|
86
|
+
- lib/logstash/filters/multiline.rb
|
87
|
+
- logstash-filter-multiline.gemspec
|
88
|
+
- rakelib/publish.rake
|
89
|
+
- rakelib/vendor.rake
|
90
|
+
- spec/filters/multiline_spec.rb
|
91
|
+
homepage: http://logstash.net/
|
92
|
+
licenses:
|
93
|
+
- Apache License (2.0)
|
94
|
+
metadata:
|
95
|
+
logstash_plugin: 'true'
|
96
|
+
group: filter
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ! '>='
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubyforge_project:
|
113
|
+
rubygems_version: 2.4.1
|
114
|
+
signing_key:
|
115
|
+
specification_version: 4
|
116
|
+
summary: This filter will collapse multiline messages from a single source into one
|
117
|
+
Logstash event.
|
118
|
+
test_files:
|
119
|
+
- spec/filters/multiline_spec.rb
|