logstash-codec-joinlines 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CONTRIBUTORS +19 -0
- data/DEVELOPER.md +3 -0
- data/Gemfile +2 -0
- data/LICENSE +11 -0
- data/README.md +86 -0
- data/lib/logstash/codecs/auto_flush.rb +48 -0
- data/lib/logstash/codecs/identity_map_codec.rb +347 -0
- data/lib/logstash/codecs/joinlines.rb +301 -0
- data/lib/logstash/codecs/retriggerable_task.rb +81 -0
- data/logstash-codec-joinlines.gemspec +28 -0
- data/spec/codecs/joinlines_spec.rb +435 -0
- data/spec/spec_helper.rb +140 -0
- metadata +132 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/codecs/base"
|
3
|
+
require "logstash/util/charset"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "logstash/codecs/auto_flush"
|
6
|
+
|
7
|
+
# The joinlines codec will join lines mathcing specified patterns.
|
8
|
+
# It is based on the multiline codec, but offers the opportunity to
|
9
|
+
# specify a list of patterns, whats and negates. The lists must be
|
10
|
+
# of equal length.
|
11
|
+
#
|
12
|
+
# IMPORTANT: If you are using a Logstash input plugin that supports multiple
|
13
|
+
# hosts, such as the <<plugins-inputs-beats>> input plugin, you should not use
|
14
|
+
# the joinlines codec to handle multiline events. Doing so may result in the
|
15
|
+
# mixing of streams and corrupted event data. In this situation, you need to
|
16
|
+
# handle multiline events before sending the event data to Logstash.
|
17
|
+
#
|
18
|
+
# Example usage
|
19
|
+
# [source,ruby]
|
20
|
+
# input {
|
21
|
+
# stdin {
|
22
|
+
# codec => joinlines {
|
23
|
+
# patterns => [ "^The following message", "^\s*at" ]
|
24
|
+
# what => [ "next", "previous" ]
|
25
|
+
# negate => [ false, false ]
|
26
|
+
# }
|
27
|
+
# }
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# The example above will join lines starting with "The following message"
|
31
|
+
# with the next line, and stack traces with the previous line.
|
32
|
+
#
|
33
|
+
module LogStash module Codecs class Joinlines < LogStash::Codecs::Base
|
34
|
+
|
35
|
+
# The codec name
|
36
|
+
config_name "joinlines"
|
37
|
+
|
38
|
+
# The patterns to recognize
|
39
|
+
config :patterns, :validate => :string, :list => true, :required => true
|
40
|
+
|
41
|
+
# The patterns to recognize
|
42
|
+
config :what, :validate => ["previous", "next"], :list => true, :required => true
|
43
|
+
|
44
|
+
# Negate match?
|
45
|
+
config :negate, :validate => :boolean, :list => true, :required => true
|
46
|
+
|
47
|
+
# Logstash ships by default with a bunch of patterns, so you don't
|
48
|
+
# necessarily need to define this yourself unless you are adding additional
|
49
|
+
# patterns.
|
50
|
+
#
|
51
|
+
# Pattern files are plain text with format:
|
52
|
+
# [source,ruby]
|
53
|
+
# NAME PATTERN
|
54
|
+
#
|
55
|
+
# For example:
|
56
|
+
# [source,ruby]
|
57
|
+
# NUMBER \d+
|
58
|
+
config :patterns_dir, :validate => :array, :default => []
|
59
|
+
|
60
|
+
# The character encoding used in this input. Examples include `UTF-8`
|
61
|
+
# and `cp1252`
|
62
|
+
#
|
63
|
+
# This setting is useful if your log files are in `Latin-1` (aka `cp1252`)
|
64
|
+
# or in another character set other than `UTF-8`.
|
65
|
+
#
|
66
|
+
# This only affects "plain" format logs since JSON is `UTF-8` already.
|
67
|
+
config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
|
68
|
+
|
69
|
+
# Tag multiline events with a given tag. This tag will only be added
|
70
|
+
# to events that actually have multiple lines in them.
|
71
|
+
config :multiline_tag, :validate => :string, :default => "joinlines"
|
72
|
+
|
73
|
+
# The accumulation of events can make logstash exit with an out of memory error
|
74
|
+
# if event boundaries are not correctly defined. This settings make sure to flush
|
75
|
+
# multiline events after reaching a number of lines, it is used in combination
|
76
|
+
# max_bytes.
|
77
|
+
config :max_lines, :validate => :number, :default => 500
|
78
|
+
|
79
|
+
# The accumulation of events can make logstash exit with an out of memory error
|
80
|
+
# if event boundaries are not correctly defined. This settings make sure to flush
|
81
|
+
# multiline events after reaching a number of bytes, it is used in combination
|
82
|
+
# max_lines.
|
83
|
+
config :max_bytes, :validate => :bytes, :default => "10 MiB"
|
84
|
+
|
85
|
+
# The accumulation of multiple lines will be converted to an event when either a
|
86
|
+
# matching new line is seen or there has been no new data appended for this many
|
87
|
+
# seconds. No default. If unset, no auto_flush. Units: seconds
|
88
|
+
config :auto_flush_interval, :validate => :number
|
89
|
+
|
90
|
+
public
|
91
|
+
def register
|
92
|
+
require "grok-pure" # rubygem 'jls-grok'
|
93
|
+
require 'logstash/patterns/core'
|
94
|
+
|
95
|
+
@matching = ""
|
96
|
+
|
97
|
+
# Detect if we are running from a jarfile, pick the right path.
|
98
|
+
patterns_path = []
|
99
|
+
patterns_path += [LogStash::Patterns::Core.path]
|
100
|
+
|
101
|
+
@patterns_dir = patterns_path.to_a + @patterns_dir
|
102
|
+
@groks = []
|
103
|
+
@handlers = []
|
104
|
+
|
105
|
+
@patterns.zip(@what).each do |pattern,what|
|
106
|
+
grok = Grok.new
|
107
|
+
|
108
|
+
@patterns_dir.each do |path|
|
109
|
+
if ::File.directory?(path)
|
110
|
+
path = ::File.join(path, "*")
|
111
|
+
end
|
112
|
+
|
113
|
+
Dir.glob(path).each do |file|
|
114
|
+
@logger.debug("Grok loading patterns from file", :path => file)
|
115
|
+
grok.add_patterns_from_file(file)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
grok.compile(pattern)
|
120
|
+
handler = method("do_#{what}".to_sym)
|
121
|
+
|
122
|
+
@groks.push(grok)
|
123
|
+
@handlers.push(handler)
|
124
|
+
end
|
125
|
+
|
126
|
+
@logger.trace("Registered joinlines plugin", :type => @type, :config => @config)
|
127
|
+
reset_buffer
|
128
|
+
|
129
|
+
@converter = LogStash::Util::Charset.new(@charset)
|
130
|
+
@converter.logger = @logger
|
131
|
+
|
132
|
+
if @auto_flush_interval
|
133
|
+
# will start on first decode
|
134
|
+
@auto_flush_runner = AutoFlush.new(self, @auto_flush_interval)
|
135
|
+
end
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def use_mapper_auto_flush
|
139
|
+
return unless auto_flush_active?
|
140
|
+
@auto_flush_runner = AutoFlushUnset.new(nil, nil)
|
141
|
+
@auto_flush_interval = @auto_flush_interval.to_f
|
142
|
+
end
|
143
|
+
|
144
|
+
def accept(listener)
|
145
|
+
# memoize references to listener that holds upstream state
|
146
|
+
@previous_listener = @last_seen_listener || listener
|
147
|
+
@last_seen_listener = listener
|
148
|
+
|
149
|
+
internal_decode(listener.data) do |event,what|
|
150
|
+
what_based_listener(what).process_event(event)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def zip_config
|
155
|
+
@patterns.zip(@what, @negate, @groks, @handlers)
|
156
|
+
end
|
157
|
+
|
158
|
+
#private
|
159
|
+
def internal_decode(text, &block)
|
160
|
+
do_flush = false
|
161
|
+
text = @converter.convert(text)
|
162
|
+
text.split("\n").each do |line|
|
163
|
+
matched = false
|
164
|
+
zip_config.each do |pattern,what,negate,grok,handler|
|
165
|
+
match = grok.match(line)
|
166
|
+
@logger.debug("Joinlines", :pattern => pattern, :text => line,
|
167
|
+
:match => (match != false), :negate => negate)
|
168
|
+
|
169
|
+
# Add negate option
|
170
|
+
match = (match and !negate) || (!match and negate)
|
171
|
+
|
172
|
+
if match
|
173
|
+
do_flush = (what == "next" and @matching != "next")
|
174
|
+
matched = true
|
175
|
+
@matching = what
|
176
|
+
break
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
if !matched
|
181
|
+
do_flush = (@matching != "next")
|
182
|
+
@matching = ""
|
183
|
+
end
|
184
|
+
|
185
|
+
if do_flush
|
186
|
+
flush do |event|
|
187
|
+
yield(event,@matching)
|
188
|
+
end
|
189
|
+
do_flush = false
|
190
|
+
end
|
191
|
+
|
192
|
+
auto_flush_runner.start
|
193
|
+
buffer(line)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
public
|
198
|
+
def decode(text, &block)
|
199
|
+
internal_decode(text) do |event,what|
|
200
|
+
yield(event)
|
201
|
+
end
|
202
|
+
end # def decode
|
203
|
+
|
204
|
+
def buffer(text)
|
205
|
+
@buffer_bytes += text.bytesize
|
206
|
+
@buffer.push(text)
|
207
|
+
end
|
208
|
+
|
209
|
+
def flush(&block)
|
210
|
+
if block_given? && @buffer.any?
|
211
|
+
no_error = true
|
212
|
+
events = merge_events
|
213
|
+
begin
|
214
|
+
yield events
|
215
|
+
rescue ::Exception => e
|
216
|
+
# need to rescue everything
|
217
|
+
# likliest cause: backpressure or timeout by exception
|
218
|
+
# can't really do anything but leave the data in the buffer for next time if there is one
|
219
|
+
@logger.error("Joinlines: flush downstream error", :exception => e)
|
220
|
+
no_error = false
|
221
|
+
end
|
222
|
+
reset_buffer if no_error
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def auto_flush(listener = @last_seen_listener)
|
227
|
+
return if listener.nil?
|
228
|
+
|
229
|
+
flush do |event|
|
230
|
+
listener.process_event(event)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def merge_events
|
235
|
+
event = LogStash::Event.new(LogStash::Event::TIMESTAMP => @time, "message" => @buffer.join(NL))
|
236
|
+
event.tag @multiline_tag if !@multiline_tag.empty? && @buffer.size > 1
|
237
|
+
event.tag "joinlines_codec_max_bytes_reached" if over_maximum_bytes?
|
238
|
+
event.tag "joinlines_codec_max_lines_reached" if over_maximum_lines?
|
239
|
+
event
|
240
|
+
end
|
241
|
+
|
242
|
+
def reset_buffer
|
243
|
+
@buffer = []
|
244
|
+
@buffer_bytes = 0
|
245
|
+
end
|
246
|
+
|
247
|
+
def doing_previous?(what)
|
248
|
+
what != "next"
|
249
|
+
end
|
250
|
+
|
251
|
+
def what_based_listener(what)
|
252
|
+
doing_previous?(what) ? @previous_listener : @last_seen_listener
|
253
|
+
end
|
254
|
+
|
255
|
+
def do_next(text, matched, &block)
|
256
|
+
buffer(text)
|
257
|
+
auto_flush_runner.start
|
258
|
+
flush(&block) if !matched || buffer_over_limits?
|
259
|
+
end
|
260
|
+
|
261
|
+
def do_previous(text, matched, &block)
|
262
|
+
flush(&block) if !matched || buffer_over_limits?
|
263
|
+
auto_flush_runner.start
|
264
|
+
buffer(text)
|
265
|
+
end
|
266
|
+
|
267
|
+
def over_maximum_lines?
|
268
|
+
@buffer.size > @max_lines
|
269
|
+
end
|
270
|
+
|
271
|
+
def over_maximum_bytes?
|
272
|
+
@buffer_bytes >= @max_bytes
|
273
|
+
end
|
274
|
+
|
275
|
+
def buffer_over_limits?
|
276
|
+
over_maximum_lines? || over_maximum_bytes?
|
277
|
+
end
|
278
|
+
|
279
|
+
def encode(event)
|
280
|
+
# Nothing to do.
|
281
|
+
@on_event.call(event, event)
|
282
|
+
end # def encode
|
283
|
+
|
284
|
+
def close
|
285
|
+
auto_flush_runner.stop
|
286
|
+
end
|
287
|
+
|
288
|
+
def auto_flush_active?
|
289
|
+
!@auto_flush_interval.nil?
|
290
|
+
end
|
291
|
+
|
292
|
+
def auto_flush_runner
|
293
|
+
@auto_flush_runner || AutoFlushUnset.new(nil, nil)
|
294
|
+
end
|
295
|
+
|
296
|
+
def initialize_copy(source)
|
297
|
+
super
|
298
|
+
register
|
299
|
+
end
|
300
|
+
|
301
|
+
end end end # class LogStash::Codecs::Joinlines
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module LogStash module Codecs class RetriggerableTask
|
4
|
+
SLEEP_FOR = 0.25.freeze
|
5
|
+
|
6
|
+
attr_reader :thread
|
7
|
+
|
8
|
+
def initialize(delay, listener)
|
9
|
+
@count = calculate_count(delay)
|
10
|
+
@listener = listener
|
11
|
+
@counter = Concurrent::AtomicFixnum.new(0 + @count)
|
12
|
+
@stopped = Concurrent::AtomicBoolean.new(false)
|
13
|
+
@semaphore = Concurrent::Semaphore.new(1)
|
14
|
+
end
|
15
|
+
|
16
|
+
def retrigger
|
17
|
+
return if stopped?
|
18
|
+
if executing?
|
19
|
+
@semaphore.acquire
|
20
|
+
end
|
21
|
+
|
22
|
+
if pending?
|
23
|
+
reset_counter
|
24
|
+
else
|
25
|
+
start
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def close
|
30
|
+
@stopped.make_true
|
31
|
+
end
|
32
|
+
|
33
|
+
def counter
|
34
|
+
@counter.value
|
35
|
+
end
|
36
|
+
|
37
|
+
def executing?
|
38
|
+
running? && counter < 1
|
39
|
+
end
|
40
|
+
|
41
|
+
def pending?
|
42
|
+
running? && counter > 0
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def calculate_count(value)
|
48
|
+
# in multiples of SLEEP_FOR (0.25) seconds
|
49
|
+
# if delay is 10 seconds then count is 40
|
50
|
+
# this only works when SLEEP_FOR is less than 1
|
51
|
+
return 1 if value < SLEEP_FOR
|
52
|
+
(value / SLEEP_FOR).floor
|
53
|
+
end
|
54
|
+
|
55
|
+
def reset_counter
|
56
|
+
@counter.value = 0 + @count
|
57
|
+
end
|
58
|
+
|
59
|
+
def running?
|
60
|
+
@thread && @thread.alive?
|
61
|
+
end
|
62
|
+
|
63
|
+
def start()
|
64
|
+
reset_counter
|
65
|
+
@thread = Thread.new do
|
66
|
+
while counter > 0
|
67
|
+
break if stopped?
|
68
|
+
sleep SLEEP_FOR
|
69
|
+
@counter.decrement
|
70
|
+
end
|
71
|
+
|
72
|
+
@semaphore.drain_permits
|
73
|
+
@listener.timeout if !stopped?
|
74
|
+
@semaphore.release
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def stopped?
|
79
|
+
@stopped.value
|
80
|
+
end
|
81
|
+
end end end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-codec-joinlines'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Merges multiline messages into a single event, allowing for multiple patterns.'
|
6
|
+
s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
|
7
|
+
s.homepage = 'https://github.com/lovmoen/logstash-codec-joinlines'
|
8
|
+
s.authors = ['Svein L. Ellingsen (lovmoen)']
|
9
|
+
s.email = 'lovmoen@gmail.com'
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
16
|
+
|
17
|
+
# Special flag to let us know this is actually a logstash plugin
|
18
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "codec" }
|
19
|
+
|
20
|
+
# Gem dependencies
|
21
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', "~> 2.0"
|
22
|
+
s.add_runtime_dependency 'logstash-codec-line'
|
23
|
+
|
24
|
+
s.add_runtime_dependency 'logstash-patterns-core'
|
25
|
+
s.add_runtime_dependency 'jls-grok', '~> 0.11.1'
|
26
|
+
|
27
|
+
s.add_development_dependency 'logstash-devutils'
|
28
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/devutils/rspec/spec_helper"
|
3
|
+
require "logstash/codecs/joinlines"
|
4
|
+
require "logstash/event"
|
5
|
+
require "insist"
|
6
|
+
require_relative '../spec_helper'
|
7
|
+
|
8
|
+
# above helper also defines a subclass of Joinlines
|
9
|
+
# called JoinlinesRspec that exposes the internal buffer
|
10
|
+
# and a Logger Mock
|
11
|
+
|
12
|
+
describe LogStash::Codecs::Joinlines do
|
13
|
+
context "#multipatterns" do
|
14
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
15
|
+
let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
|
16
|
+
let(:events) { [] }
|
17
|
+
let(:line_producer) do
|
18
|
+
lambda do |lines|
|
19
|
+
lines.each do |line|
|
20
|
+
codec.decode(line) do |event|
|
21
|
+
events << event
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should internally decode lines to (event, what) pairs" do
|
28
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
29
|
+
text = "hello world\n second line\nanother first line\nnext\nowns previous\n"
|
30
|
+
|
31
|
+
events = []
|
32
|
+
whats = []
|
33
|
+
codec.internal_decode(text) do |event,what|
|
34
|
+
events.push(event)
|
35
|
+
whats.push(what)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Must flush to get last event
|
39
|
+
codec.flush do |event|
|
40
|
+
events.push(event)
|
41
|
+
whats.push("final") # dummy
|
42
|
+
end
|
43
|
+
|
44
|
+
expect(events.size).to eq(3)
|
45
|
+
expect(whats.size).to eq(3)
|
46
|
+
expect(events[0].get("message")).to eq("hello world\n second line")
|
47
|
+
expect(events[1].get("message")).to eq("another first line")
|
48
|
+
expect(events[2].get("message")).to eq("next\nowns previous")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should break between consecutive previous and next" do
|
52
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
53
|
+
lines = [ "hello world", " second line", "next", "owns previous" ]
|
54
|
+
line_producer.call(lines)
|
55
|
+
codec.flush { |e| events << e }
|
56
|
+
|
57
|
+
expect(events.size).to eq(2)
|
58
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
59
|
+
expect(events[0].get("tags")).to include("joinlines")
|
60
|
+
expect(events[1].get("message")).to eq "next\nowns previous"
|
61
|
+
expect(events[1].get("tags")).to include("joinlines")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should stitch together consecutive next and previous" do
|
65
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
66
|
+
lines = [ "next", "owns previous and next", " second line", "another first" ]
|
67
|
+
line_producer.call(lines)
|
68
|
+
codec.flush { |e| events << e }
|
69
|
+
|
70
|
+
expect(events.size).to eq(2)
|
71
|
+
expect(events[0].get("message")).to eq "next\nowns previous and next\n second line"
|
72
|
+
expect(events[0].get("tags")).to include("joinlines")
|
73
|
+
expect(events[1].get("message")).to eq "another first"
|
74
|
+
expect(events[1].get("tags")).to be_nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "#decode" do
|
79
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
80
|
+
let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
|
81
|
+
let(:events) { [] }
|
82
|
+
let(:line_producer) do
|
83
|
+
lambda do |lines|
|
84
|
+
lines.each do |line|
|
85
|
+
codec.decode(line) do |event|
|
86
|
+
events << event
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should be able to handle multiline events with additional lines space-indented" do
|
93
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
94
|
+
lines = [ "hello world", " second line", "another first line" ]
|
95
|
+
line_producer.call(lines)
|
96
|
+
codec.flush { |e| events << e }
|
97
|
+
|
98
|
+
expect(events.size).to eq(2)
|
99
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
100
|
+
expect(events[0].get("tags")).to include("joinlines")
|
101
|
+
expect(events[1].get("message")).to eq "another first line"
|
102
|
+
expect(events[1].get("tags")).to be_nil
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should allow custom tag added to multiline events" do
|
106
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false, "multiline_tag" => "hurray")
|
107
|
+
lines = [ "hello world", " second line", "another first line" ]
|
108
|
+
line_producer.call(lines)
|
109
|
+
codec.flush { |e| events << e }
|
110
|
+
|
111
|
+
expect(events.size).to eq 2
|
112
|
+
expect(events[0].get("tags")).to include("hurray")
|
113
|
+
expect(events[1].get("tags")).to be_nil
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should handle new lines in messages" do
|
117
|
+
config.update("patterns" => '\D', "what" => "previous", "negate" => false)
|
118
|
+
lineio = StringIO.new("1234567890\nA234567890\nB234567890\n0987654321\n")
|
119
|
+
until lineio.eof
|
120
|
+
line = lineio.read(256) #when this is set to 36 the tests fail
|
121
|
+
codec.decode(line) {|evt| events.push(evt)}
|
122
|
+
end
|
123
|
+
codec.flush { |e| events << e }
|
124
|
+
expect(events[0].get("message")).to eq "1234567890\nA234567890\nB234567890"
|
125
|
+
expect(events[1].get("message")).to eq "0987654321"
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should allow grok patterns to be used" do
|
129
|
+
config.update(
|
130
|
+
"patterns" => "^%{NUMBER} %{TIME}",
|
131
|
+
"negate" => true,
|
132
|
+
"what" => "previous"
|
133
|
+
)
|
134
|
+
|
135
|
+
lines = [ "120913 12:04:33 first line", "second line", "third line" ]
|
136
|
+
|
137
|
+
line_producer.call(lines)
|
138
|
+
codec.flush { |e| events << e }
|
139
|
+
|
140
|
+
insist { events.size } == 1
|
141
|
+
insist { events.first.get("message") } == lines.join("\n")
|
142
|
+
end
|
143
|
+
|
144
|
+
context "using default UTF-8 charset" do
|
145
|
+
|
146
|
+
it "should decode valid UTF-8 input" do
|
147
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
148
|
+
lines = [ "foobar", "κόσμε" ]
|
149
|
+
lines.each do |line|
|
150
|
+
expect(line.encoding.name).to eq "UTF-8"
|
151
|
+
expect(line.valid_encoding?).to be_truthy
|
152
|
+
codec.decode(line) { |event| events << event }
|
153
|
+
end
|
154
|
+
|
155
|
+
codec.flush { |e| events << e }
|
156
|
+
expect(events.size).to eq 2
|
157
|
+
|
158
|
+
events.zip(lines).each do |tuple|
|
159
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
160
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should escape invalid sequences" do
|
165
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
166
|
+
lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
|
167
|
+
lines.each do |line|
|
168
|
+
expect(line.encoding.name).to eq "UTF-8"
|
169
|
+
expect(line.valid_encoding?).to eq false
|
170
|
+
|
171
|
+
codec.decode(line) { |event| events << event }
|
172
|
+
end
|
173
|
+
codec.flush { |e| events << e }
|
174
|
+
expect(events.size).to eq 2
|
175
|
+
|
176
|
+
events.zip(lines).each do |tuple|
|
177
|
+
expect(tuple[0].get("message")).to eq tuple[1].inspect[1..-2]
|
178
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
it "decodes and joins multiple patterns" do
|
183
|
+
config.update("patterns" => [ "^\\s", "^the following" ], "what" => [ "previous", "next" ], "negate" => [ false, false] )
|
184
|
+
lines = [ "hello world", " second line", "another first line", "the following message belongs to next", "I own the previous", "Another first" ]
|
185
|
+
|
186
|
+
lines.each do |line|
|
187
|
+
codec.decode(line) do |event|
|
188
|
+
events << event
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
codec.flush { |e| events << e }
|
193
|
+
|
194
|
+
#expect(events.size).to eq(4)
|
195
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
196
|
+
expect(events[0].get("tags")).to include("joinlines")
|
197
|
+
expect(events[1].get("message")).to eq "another first line"
|
198
|
+
expect(events[1].get("tags")).to be_nil
|
199
|
+
expect(events[2].get("message")).to eq "the following message belongs to next\nI own the previous"
|
200
|
+
expect(events[2].get("tags")).to include("joinlines")
|
201
|
+
expect(events[3].get("message")).to eq "Another first"
|
202
|
+
expect(events[3].get("tags")).to be_nil
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
|
207
|
+
context "with valid non UTF-8 source encoding" do
|
208
|
+
|
209
|
+
it "should encode to UTF-8" do
|
210
|
+
config.update("charset" => "ISO-8859-1", "patterns" => "^\\s", "what" => "previous", "negate" => false)
|
211
|
+
samples = [
|
212
|
+
["foobar", "foobar"],
|
213
|
+
["\xE0 Montr\xE9al", "à Montréal"],
|
214
|
+
]
|
215
|
+
|
216
|
+
# lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
|
217
|
+
samples.map{|(a, b)| a.force_encoding("ISO-8859-1")}.each do |line|
|
218
|
+
expect(line.encoding.name).to eq "ISO-8859-1"
|
219
|
+
expect(line.valid_encoding?).to eq true
|
220
|
+
|
221
|
+
codec.decode(line) { |event| events << event }
|
222
|
+
end
|
223
|
+
codec.flush { |e| events << e }
|
224
|
+
expect(events.size).to eq 2
|
225
|
+
|
226
|
+
events.zip(samples.map{|(a, b)| b}).each do |tuple|
|
227
|
+
expect(tuple[1].encoding.name).to eq "UTF-8"
|
228
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
229
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
context "with invalid non UTF-8 source encoding" do
|
235
|
+
|
236
|
+
it "should encode to UTF-8" do
|
237
|
+
config.update("charset" => "ASCII-8BIT", "patterns" => "^\\s", "what" => "previous", "negate" => false)
|
238
|
+
samples = [
|
239
|
+
["\xE0 Montr\xE9al", "� Montr�al"],
|
240
|
+
["\xCE\xBA\xCF\x8C\xCF\x83\xCE\xBC\xCE\xB5", "����������"],
|
241
|
+
]
|
242
|
+
events = []
|
243
|
+
samples.map{|(a, b)| a.force_encoding("ASCII-8BIT")}.each do |line|
|
244
|
+
expect(line.encoding.name).to eq "ASCII-8BIT"
|
245
|
+
expect(line.valid_encoding?).to eq true
|
246
|
+
|
247
|
+
codec.decode(line) { |event| events << event }
|
248
|
+
end
|
249
|
+
codec.flush { |e| events << e }
|
250
|
+
expect(events.size).to eq 2
|
251
|
+
|
252
|
+
events.zip(samples.map{|(a, b)| b}).each do |tuple|
|
253
|
+
expect(tuple[1].encoding.name).to eq "UTF-8"
|
254
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
255
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
context "with non closed multiline events" do
|
263
|
+
let(:random_number_of_events) { rand(300..1000) }
|
264
|
+
let(:sample_event) { "- Sample event" }
|
265
|
+
let(:events) { decode_events }
|
266
|
+
let(:unmerged_events_count) { events.collect { |event| event.get("message").split(LogStash::Codecs::Joinlines::NL).size }.inject(&:+) }
|
267
|
+
|
268
|
+
context "break on maximum_lines" do
|
269
|
+
let(:max_lines) { rand(10..100) }
|
270
|
+
let(:options) {
|
271
|
+
{
|
272
|
+
"patterns" => "^-",
|
273
|
+
"what" => "previous",
|
274
|
+
"negate" => false,
|
275
|
+
"max_lines" => max_lines,
|
276
|
+
"max_bytes" => "2 mb"
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
it "flushes on a maximum lines" do
|
281
|
+
expect(unmerged_events_count).to eq(random_number_of_events)
|
282
|
+
end
|
283
|
+
|
284
|
+
it "tags the event" do
|
285
|
+
expect(events.first.get("tags")).to include("joinlines_codec_max_lines_reached")
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
context "break on maximum bytes" do
|
290
|
+
let(:max_bytes) { rand(30..100) }
|
291
|
+
let(:options) {
|
292
|
+
{
|
293
|
+
"patterns" => "^-",
|
294
|
+
"what" => "previous",
|
295
|
+
"negate" => false,
|
296
|
+
"max_lines" => 20000,
|
297
|
+
"max_bytes" => max_bytes
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
it "flushes on a maximum bytes size" do
|
302
|
+
expect(unmerged_events_count).to eq(random_number_of_events)
|
303
|
+
end
|
304
|
+
|
305
|
+
it "tags the event" do
|
306
|
+
expect(events.first.get("tags")).to include("joinlines_codec_max_bytes_reached")
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
describe "auto flushing" do
|
312
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
313
|
+
let(:events) { [] }
|
314
|
+
let(:lines) do
|
315
|
+
{ "en.log" => ["hello world", " second line", " third line"],
|
316
|
+
"fr.log" => ["Salut le Monde", " deuxième ligne", " troisième ligne"],
|
317
|
+
"de.log" => ["Hallo Welt"] }
|
318
|
+
end
|
319
|
+
let(:listener_class) { Jlc::LineListener }
|
320
|
+
let(:auto_flush_interval) { 2 }
|
321
|
+
|
322
|
+
let(:line_producer) do
|
323
|
+
lambda do |path|
|
324
|
+
#create a listener that holds upstream state
|
325
|
+
listener = listener_class.new(events, codec, path)
|
326
|
+
lines[path].each do |data|
|
327
|
+
listener.accept(data)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
let(:codec) do
|
333
|
+
Jlc::JoinlinesRspec.new(config).tap {|c| c.register}
|
334
|
+
end
|
335
|
+
|
336
|
+
before :each do
|
337
|
+
expect(LogStash::Codecs::Joinlines).to receive(:logger).and_return(Jlc::JoinlinesLogTracer.new).at_least(:once)
|
338
|
+
end
|
339
|
+
|
340
|
+
context "when auto_flush_interval is not set" do
|
341
|
+
it "does not build any events" do
|
342
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
343
|
+
line_producer.call("en.log")
|
344
|
+
sleep auto_flush_interval + 0.1
|
345
|
+
expect(events.size).to eq(0)
|
346
|
+
expect(codec.buffer_size).to eq(3)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
context "when the auto_flush raises an exception" do
|
351
|
+
let(:errmsg) { "OMG, Daleks!" }
|
352
|
+
let(:listener_class) { Jlc::LineErrorListener }
|
353
|
+
|
354
|
+
it "does not build any events, logs an error and the buffer data remains" do
|
355
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
|
356
|
+
"auto_flush_interval" => auto_flush_interval)
|
357
|
+
line_producer.call("en.log")
|
358
|
+
sleep(auto_flush_interval + 0.2)
|
359
|
+
msg, args = codec.logger.trace_for(:error)
|
360
|
+
expect(msg).to eq("Joinlines: flush downstream error")
|
361
|
+
expect(args[:exception].message).to eq(errmsg)
|
362
|
+
expect(events.size).to eq(0)
|
363
|
+
expect(codec.buffer_size).to eq(3)
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def assert_produced_events(key, sleeping)
|
368
|
+
line_producer.call(key)
|
369
|
+
sleep(sleeping)
|
370
|
+
yield
|
371
|
+
#expect(codec).to have_an_empty_buffer
|
372
|
+
end
|
373
|
+
|
374
|
+
context "mode: previous, when there are pauses between multiline file writes" do
|
375
|
+
it "auto-flushes events from the accumulated lines to the queue" do
|
376
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
|
377
|
+
"auto_flush_interval" => auto_flush_interval)
|
378
|
+
|
379
|
+
assert_produced_events("en.log", auto_flush_interval + 0.1) do
|
380
|
+
expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
|
381
|
+
end
|
382
|
+
|
383
|
+
line_producer.call("fr.log")
|
384
|
+
#next line(s) come before auto-flush i.e. assert its buffered
|
385
|
+
sleep(auto_flush_interval - 0.3)
|
386
|
+
expect(codec.buffer_size).to eq(3)
|
387
|
+
expect(events.size).to eq(1)
|
388
|
+
|
389
|
+
assert_produced_events("de.log", auto_flush_interval + 0.1) do
|
390
|
+
# now the events are generated
|
391
|
+
expect(events[1]).to match_path_and_line("fr.log", lines["fr.log"])
|
392
|
+
expect(events[2]).to match_path_and_line("de.log", lines["de.log"])
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
context "mode: next, when there are pauses between multiline file writes" do
|
398
|
+
|
399
|
+
let(:lines) do
|
400
|
+
{ "en.log" => ["hello world++", "second line++", "third line"],
|
401
|
+
"fr.log" => ["Salut le Monde++", "deuxième ligne++", "troisième ligne"],
|
402
|
+
"de.log" => ["Hallo Welt"] }
|
403
|
+
end
|
404
|
+
|
405
|
+
it "auto-flushes events from the accumulated lines to the queue" do
|
406
|
+
config.update("patterns" => "\\+\\+$", "what" => "next", "negate" => false,
|
407
|
+
"auto_flush_interval" => auto_flush_interval)
|
408
|
+
|
409
|
+
assert_produced_events("en.log", auto_flush_interval + 0.1) do
|
410
|
+
# wait for auto_flush
|
411
|
+
expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
|
412
|
+
end
|
413
|
+
|
414
|
+
expect(codec).to have_an_empty_buffer
|
415
|
+
|
416
|
+
assert_produced_events("de.log", auto_flush_interval - 0.3) do
|
417
|
+
# this file is read before auto-flush, thus last event is not flushed yet
|
418
|
+
# This differs from logstash-codec-multiline because of not emitting
|
419
|
+
# last received event even if not matched
|
420
|
+
expect(events.size).to eq(1)
|
421
|
+
end
|
422
|
+
|
423
|
+
codec.flush { |event| events << event } # flushing here releases the event
|
424
|
+
expect(events.size).to eq(2)
|
425
|
+
expect(events[1]).to match_path_and_line(nil, lines["de.log"]) # but path is not set when emitted by flush
|
426
|
+
expect(codec).to have_an_empty_buffer
|
427
|
+
|
428
|
+
assert_produced_events("fr.log", auto_flush_interval + 0.1) do
|
429
|
+
# wait for auto_flush
|
430
|
+
expect(events[2]).to match_path_and_line("fr.log", lines["fr.log"])
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|