logstash-codec-joinlines 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CONTRIBUTORS +19 -0
- data/DEVELOPER.md +3 -0
- data/Gemfile +2 -0
- data/LICENSE +11 -0
- data/README.md +86 -0
- data/lib/logstash/codecs/auto_flush.rb +48 -0
- data/lib/logstash/codecs/identity_map_codec.rb +347 -0
- data/lib/logstash/codecs/joinlines.rb +301 -0
- data/lib/logstash/codecs/retriggerable_task.rb +81 -0
- data/logstash-codec-joinlines.gemspec +28 -0
- data/spec/codecs/joinlines_spec.rb +435 -0
- data/spec/spec_helper.rb +140 -0
- metadata +132 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/codecs/base"
|
3
|
+
require "logstash/util/charset"
|
4
|
+
require "logstash/timestamp"
|
5
|
+
require "logstash/codecs/auto_flush"
|
6
|
+
|
7
|
+
# The joinlines codec will join lines mathcing specified patterns.
|
8
|
+
# It is based on the multiline codec, but offers the opportunity to
|
9
|
+
# specify a list of patterns, whats and negates. The lists must be
|
10
|
+
# of equal length.
|
11
|
+
#
|
12
|
+
# IMPORTANT: If you are using a Logstash input plugin that supports multiple
|
13
|
+
# hosts, such as the <<plugins-inputs-beats>> input plugin, you should not use
|
14
|
+
# the joinlines codec to handle multiline events. Doing so may result in the
|
15
|
+
# mixing of streams and corrupted event data. In this situation, you need to
|
16
|
+
# handle multiline events before sending the event data to Logstash.
|
17
|
+
#
|
18
|
+
# Example usage
|
19
|
+
# [source,ruby]
|
20
|
+
# input {
|
21
|
+
# stdin {
|
22
|
+
# codec => joinlines {
|
23
|
+
# patterns => [ "^The following message", "^\s*at" ]
|
24
|
+
# what => [ "next", "previous" ]
|
25
|
+
# negate => [ false, false ]
|
26
|
+
# }
|
27
|
+
# }
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# The example above will join lines starting with "The following message"
|
31
|
+
# with the next line, and stack traces with the previous line.
|
32
|
+
#
|
33
|
+
module LogStash module Codecs class Joinlines < LogStash::Codecs::Base
|
34
|
+
|
35
|
+
# The codec name
|
36
|
+
config_name "joinlines"
|
37
|
+
|
38
|
+
# The patterns to recognize
|
39
|
+
config :patterns, :validate => :string, :list => true, :required => true
|
40
|
+
|
41
|
+
# The patterns to recognize
|
42
|
+
config :what, :validate => ["previous", "next"], :list => true, :required => true
|
43
|
+
|
44
|
+
# Negate match?
|
45
|
+
config :negate, :validate => :boolean, :list => true, :required => true
|
46
|
+
|
47
|
+
# Logstash ships by default with a bunch of patterns, so you don't
|
48
|
+
# necessarily need to define this yourself unless you are adding additional
|
49
|
+
# patterns.
|
50
|
+
#
|
51
|
+
# Pattern files are plain text with format:
|
52
|
+
# [source,ruby]
|
53
|
+
# NAME PATTERN
|
54
|
+
#
|
55
|
+
# For example:
|
56
|
+
# [source,ruby]
|
57
|
+
# NUMBER \d+
|
58
|
+
config :patterns_dir, :validate => :array, :default => []
|
59
|
+
|
60
|
+
# The character encoding used in this input. Examples include `UTF-8`
|
61
|
+
# and `cp1252`
|
62
|
+
#
|
63
|
+
# This setting is useful if your log files are in `Latin-1` (aka `cp1252`)
|
64
|
+
# or in another character set other than `UTF-8`.
|
65
|
+
#
|
66
|
+
# This only affects "plain" format logs since JSON is `UTF-8` already.
|
67
|
+
config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
|
68
|
+
|
69
|
+
# Tag multiline events with a given tag. This tag will only be added
|
70
|
+
# to events that actually have multiple lines in them.
|
71
|
+
config :multiline_tag, :validate => :string, :default => "joinlines"
|
72
|
+
|
73
|
+
# The accumulation of events can make logstash exit with an out of memory error
|
74
|
+
# if event boundaries are not correctly defined. This settings make sure to flush
|
75
|
+
# multiline events after reaching a number of lines, it is used in combination
|
76
|
+
# max_bytes.
|
77
|
+
config :max_lines, :validate => :number, :default => 500
|
78
|
+
|
79
|
+
# The accumulation of events can make logstash exit with an out of memory error
|
80
|
+
# if event boundaries are not correctly defined. This settings make sure to flush
|
81
|
+
# multiline events after reaching a number of bytes, it is used in combination
|
82
|
+
# max_lines.
|
83
|
+
config :max_bytes, :validate => :bytes, :default => "10 MiB"
|
84
|
+
|
85
|
+
# The accumulation of multiple lines will be converted to an event when either a
|
86
|
+
# matching new line is seen or there has been no new data appended for this many
|
87
|
+
# seconds. No default. If unset, no auto_flush. Units: seconds
|
88
|
+
config :auto_flush_interval, :validate => :number
|
89
|
+
|
90
|
+
public
|
91
|
+
def register
|
92
|
+
require "grok-pure" # rubygem 'jls-grok'
|
93
|
+
require 'logstash/patterns/core'
|
94
|
+
|
95
|
+
@matching = ""
|
96
|
+
|
97
|
+
# Detect if we are running from a jarfile, pick the right path.
|
98
|
+
patterns_path = []
|
99
|
+
patterns_path += [LogStash::Patterns::Core.path]
|
100
|
+
|
101
|
+
@patterns_dir = patterns_path.to_a + @patterns_dir
|
102
|
+
@groks = []
|
103
|
+
@handlers = []
|
104
|
+
|
105
|
+
@patterns.zip(@what).each do |pattern,what|
|
106
|
+
grok = Grok.new
|
107
|
+
|
108
|
+
@patterns_dir.each do |path|
|
109
|
+
if ::File.directory?(path)
|
110
|
+
path = ::File.join(path, "*")
|
111
|
+
end
|
112
|
+
|
113
|
+
Dir.glob(path).each do |file|
|
114
|
+
@logger.debug("Grok loading patterns from file", :path => file)
|
115
|
+
grok.add_patterns_from_file(file)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
grok.compile(pattern)
|
120
|
+
handler = method("do_#{what}".to_sym)
|
121
|
+
|
122
|
+
@groks.push(grok)
|
123
|
+
@handlers.push(handler)
|
124
|
+
end
|
125
|
+
|
126
|
+
@logger.trace("Registered joinlines plugin", :type => @type, :config => @config)
|
127
|
+
reset_buffer
|
128
|
+
|
129
|
+
@converter = LogStash::Util::Charset.new(@charset)
|
130
|
+
@converter.logger = @logger
|
131
|
+
|
132
|
+
if @auto_flush_interval
|
133
|
+
# will start on first decode
|
134
|
+
@auto_flush_runner = AutoFlush.new(self, @auto_flush_interval)
|
135
|
+
end
|
136
|
+
end # def register
|
137
|
+
|
138
|
+
def use_mapper_auto_flush
|
139
|
+
return unless auto_flush_active?
|
140
|
+
@auto_flush_runner = AutoFlushUnset.new(nil, nil)
|
141
|
+
@auto_flush_interval = @auto_flush_interval.to_f
|
142
|
+
end
|
143
|
+
|
144
|
+
def accept(listener)
|
145
|
+
# memoize references to listener that holds upstream state
|
146
|
+
@previous_listener = @last_seen_listener || listener
|
147
|
+
@last_seen_listener = listener
|
148
|
+
|
149
|
+
internal_decode(listener.data) do |event,what|
|
150
|
+
what_based_listener(what).process_event(event)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def zip_config
|
155
|
+
@patterns.zip(@what, @negate, @groks, @handlers)
|
156
|
+
end
|
157
|
+
|
158
|
+
#private
|
159
|
+
def internal_decode(text, &block)
|
160
|
+
do_flush = false
|
161
|
+
text = @converter.convert(text)
|
162
|
+
text.split("\n").each do |line|
|
163
|
+
matched = false
|
164
|
+
zip_config.each do |pattern,what,negate,grok,handler|
|
165
|
+
match = grok.match(line)
|
166
|
+
@logger.debug("Joinlines", :pattern => pattern, :text => line,
|
167
|
+
:match => (match != false), :negate => negate)
|
168
|
+
|
169
|
+
# Add negate option
|
170
|
+
match = (match and !negate) || (!match and negate)
|
171
|
+
|
172
|
+
if match
|
173
|
+
do_flush = (what == "next" and @matching != "next")
|
174
|
+
matched = true
|
175
|
+
@matching = what
|
176
|
+
break
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
if !matched
|
181
|
+
do_flush = (@matching != "next")
|
182
|
+
@matching = ""
|
183
|
+
end
|
184
|
+
|
185
|
+
if do_flush
|
186
|
+
flush do |event|
|
187
|
+
yield(event,@matching)
|
188
|
+
end
|
189
|
+
do_flush = false
|
190
|
+
end
|
191
|
+
|
192
|
+
auto_flush_runner.start
|
193
|
+
buffer(line)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
public
|
198
|
+
def decode(text, &block)
|
199
|
+
internal_decode(text) do |event,what|
|
200
|
+
yield(event)
|
201
|
+
end
|
202
|
+
end # def decode
|
203
|
+
|
204
|
+
def buffer(text)
|
205
|
+
@buffer_bytes += text.bytesize
|
206
|
+
@buffer.push(text)
|
207
|
+
end
|
208
|
+
|
209
|
+
def flush(&block)
|
210
|
+
if block_given? && @buffer.any?
|
211
|
+
no_error = true
|
212
|
+
events = merge_events
|
213
|
+
begin
|
214
|
+
yield events
|
215
|
+
rescue ::Exception => e
|
216
|
+
# need to rescue everything
|
217
|
+
# likliest cause: backpressure or timeout by exception
|
218
|
+
# can't really do anything but leave the data in the buffer for next time if there is one
|
219
|
+
@logger.error("Joinlines: flush downstream error", :exception => e)
|
220
|
+
no_error = false
|
221
|
+
end
|
222
|
+
reset_buffer if no_error
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def auto_flush(listener = @last_seen_listener)
|
227
|
+
return if listener.nil?
|
228
|
+
|
229
|
+
flush do |event|
|
230
|
+
listener.process_event(event)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def merge_events
|
235
|
+
event = LogStash::Event.new(LogStash::Event::TIMESTAMP => @time, "message" => @buffer.join(NL))
|
236
|
+
event.tag @multiline_tag if !@multiline_tag.empty? && @buffer.size > 1
|
237
|
+
event.tag "joinlines_codec_max_bytes_reached" if over_maximum_bytes?
|
238
|
+
event.tag "joinlines_codec_max_lines_reached" if over_maximum_lines?
|
239
|
+
event
|
240
|
+
end
|
241
|
+
|
242
|
+
def reset_buffer
|
243
|
+
@buffer = []
|
244
|
+
@buffer_bytes = 0
|
245
|
+
end
|
246
|
+
|
247
|
+
def doing_previous?(what)
|
248
|
+
what != "next"
|
249
|
+
end
|
250
|
+
|
251
|
+
def what_based_listener(what)
|
252
|
+
doing_previous?(what) ? @previous_listener : @last_seen_listener
|
253
|
+
end
|
254
|
+
|
255
|
+
def do_next(text, matched, &block)
|
256
|
+
buffer(text)
|
257
|
+
auto_flush_runner.start
|
258
|
+
flush(&block) if !matched || buffer_over_limits?
|
259
|
+
end
|
260
|
+
|
261
|
+
def do_previous(text, matched, &block)
|
262
|
+
flush(&block) if !matched || buffer_over_limits?
|
263
|
+
auto_flush_runner.start
|
264
|
+
buffer(text)
|
265
|
+
end
|
266
|
+
|
267
|
+
def over_maximum_lines?
|
268
|
+
@buffer.size > @max_lines
|
269
|
+
end
|
270
|
+
|
271
|
+
def over_maximum_bytes?
|
272
|
+
@buffer_bytes >= @max_bytes
|
273
|
+
end
|
274
|
+
|
275
|
+
def buffer_over_limits?
|
276
|
+
over_maximum_lines? || over_maximum_bytes?
|
277
|
+
end
|
278
|
+
|
279
|
+
def encode(event)
|
280
|
+
# Nothing to do.
|
281
|
+
@on_event.call(event, event)
|
282
|
+
end # def encode
|
283
|
+
|
284
|
+
def close
|
285
|
+
auto_flush_runner.stop
|
286
|
+
end
|
287
|
+
|
288
|
+
def auto_flush_active?
|
289
|
+
!@auto_flush_interval.nil?
|
290
|
+
end
|
291
|
+
|
292
|
+
def auto_flush_runner
|
293
|
+
@auto_flush_runner || AutoFlushUnset.new(nil, nil)
|
294
|
+
end
|
295
|
+
|
296
|
+
def initialize_copy(source)
|
297
|
+
super
|
298
|
+
register
|
299
|
+
end
|
300
|
+
|
301
|
+
end end end # class LogStash::Codecs::Joinlines
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require "concurrent"
|
2
|
+
|
3
|
+
module LogStash module Codecs class RetriggerableTask
|
4
|
+
SLEEP_FOR = 0.25.freeze
|
5
|
+
|
6
|
+
attr_reader :thread
|
7
|
+
|
8
|
+
def initialize(delay, listener)
|
9
|
+
@count = calculate_count(delay)
|
10
|
+
@listener = listener
|
11
|
+
@counter = Concurrent::AtomicFixnum.new(0 + @count)
|
12
|
+
@stopped = Concurrent::AtomicBoolean.new(false)
|
13
|
+
@semaphore = Concurrent::Semaphore.new(1)
|
14
|
+
end
|
15
|
+
|
16
|
+
def retrigger
|
17
|
+
return if stopped?
|
18
|
+
if executing?
|
19
|
+
@semaphore.acquire
|
20
|
+
end
|
21
|
+
|
22
|
+
if pending?
|
23
|
+
reset_counter
|
24
|
+
else
|
25
|
+
start
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def close
|
30
|
+
@stopped.make_true
|
31
|
+
end
|
32
|
+
|
33
|
+
def counter
|
34
|
+
@counter.value
|
35
|
+
end
|
36
|
+
|
37
|
+
def executing?
|
38
|
+
running? && counter < 1
|
39
|
+
end
|
40
|
+
|
41
|
+
def pending?
|
42
|
+
running? && counter > 0
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def calculate_count(value)
|
48
|
+
# in multiples of SLEEP_FOR (0.25) seconds
|
49
|
+
# if delay is 10 seconds then count is 40
|
50
|
+
# this only works when SLEEP_FOR is less than 1
|
51
|
+
return 1 if value < SLEEP_FOR
|
52
|
+
(value / SLEEP_FOR).floor
|
53
|
+
end
|
54
|
+
|
55
|
+
def reset_counter
|
56
|
+
@counter.value = 0 + @count
|
57
|
+
end
|
58
|
+
|
59
|
+
def running?
|
60
|
+
@thread && @thread.alive?
|
61
|
+
end
|
62
|
+
|
63
|
+
def start()
|
64
|
+
reset_counter
|
65
|
+
@thread = Thread.new do
|
66
|
+
while counter > 0
|
67
|
+
break if stopped?
|
68
|
+
sleep SLEEP_FOR
|
69
|
+
@counter.decrement
|
70
|
+
end
|
71
|
+
|
72
|
+
@semaphore.drain_permits
|
73
|
+
@listener.timeout if !stopped?
|
74
|
+
@semaphore.release
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def stopped?
|
79
|
+
@stopped.value
|
80
|
+
end
|
81
|
+
end end end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-codec-joinlines'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = 'Merges multiline messages into a single event, allowing for multiple patterns.'
|
6
|
+
s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
|
7
|
+
s.homepage = 'https://github.com/lovmoen/logstash-codec-joinlines'
|
8
|
+
s.authors = ['Svein L. Ellingsen (lovmoen)']
|
9
|
+
s.email = 'lovmoen@gmail.com'
|
10
|
+
s.require_paths = ['lib']
|
11
|
+
|
12
|
+
# Files
|
13
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
14
|
+
# Tests
|
15
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
16
|
+
|
17
|
+
# Special flag to let us know this is actually a logstash plugin
|
18
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "codec" }
|
19
|
+
|
20
|
+
# Gem dependencies
|
21
|
+
s.add_runtime_dependency 'logstash-core-plugin-api', "~> 2.0"
|
22
|
+
s.add_runtime_dependency 'logstash-codec-line'
|
23
|
+
|
24
|
+
s.add_runtime_dependency 'logstash-patterns-core'
|
25
|
+
s.add_runtime_dependency 'jls-grok', '~> 0.11.1'
|
26
|
+
|
27
|
+
s.add_development_dependency 'logstash-devutils'
|
28
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/devutils/rspec/spec_helper"
|
3
|
+
require "logstash/codecs/joinlines"
|
4
|
+
require "logstash/event"
|
5
|
+
require "insist"
|
6
|
+
require_relative '../spec_helper'
|
7
|
+
|
8
|
+
# above helper also defines a subclass of Joinlines
|
9
|
+
# called JoinlinesRspec that exposes the internal buffer
|
10
|
+
# and a Logger Mock
|
11
|
+
|
12
|
+
describe LogStash::Codecs::Joinlines do
|
13
|
+
context "#multipatterns" do
|
14
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
15
|
+
let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
|
16
|
+
let(:events) { [] }
|
17
|
+
let(:line_producer) do
|
18
|
+
lambda do |lines|
|
19
|
+
lines.each do |line|
|
20
|
+
codec.decode(line) do |event|
|
21
|
+
events << event
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should internally decode lines to (event, what) pairs" do
|
28
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
29
|
+
text = "hello world\n second line\nanother first line\nnext\nowns previous\n"
|
30
|
+
|
31
|
+
events = []
|
32
|
+
whats = []
|
33
|
+
codec.internal_decode(text) do |event,what|
|
34
|
+
events.push(event)
|
35
|
+
whats.push(what)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Must flush to get last event
|
39
|
+
codec.flush do |event|
|
40
|
+
events.push(event)
|
41
|
+
whats.push("final") # dummy
|
42
|
+
end
|
43
|
+
|
44
|
+
expect(events.size).to eq(3)
|
45
|
+
expect(whats.size).to eq(3)
|
46
|
+
expect(events[0].get("message")).to eq("hello world\n second line")
|
47
|
+
expect(events[1].get("message")).to eq("another first line")
|
48
|
+
expect(events[2].get("message")).to eq("next\nowns previous")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should break between consecutive previous and next" do
|
52
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
53
|
+
lines = [ "hello world", " second line", "next", "owns previous" ]
|
54
|
+
line_producer.call(lines)
|
55
|
+
codec.flush { |e| events << e }
|
56
|
+
|
57
|
+
expect(events.size).to eq(2)
|
58
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
59
|
+
expect(events[0].get("tags")).to include("joinlines")
|
60
|
+
expect(events[1].get("message")).to eq "next\nowns previous"
|
61
|
+
expect(events[1].get("tags")).to include("joinlines")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should stitch together consecutive next and previous" do
|
65
|
+
config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
|
66
|
+
lines = [ "next", "owns previous and next", " second line", "another first" ]
|
67
|
+
line_producer.call(lines)
|
68
|
+
codec.flush { |e| events << e }
|
69
|
+
|
70
|
+
expect(events.size).to eq(2)
|
71
|
+
expect(events[0].get("message")).to eq "next\nowns previous and next\n second line"
|
72
|
+
expect(events[0].get("tags")).to include("joinlines")
|
73
|
+
expect(events[1].get("message")).to eq "another first"
|
74
|
+
expect(events[1].get("tags")).to be_nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "#decode" do
|
79
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
80
|
+
let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
|
81
|
+
let(:events) { [] }
|
82
|
+
let(:line_producer) do
|
83
|
+
lambda do |lines|
|
84
|
+
lines.each do |line|
|
85
|
+
codec.decode(line) do |event|
|
86
|
+
events << event
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should be able to handle multiline events with additional lines space-indented" do
|
93
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
94
|
+
lines = [ "hello world", " second line", "another first line" ]
|
95
|
+
line_producer.call(lines)
|
96
|
+
codec.flush { |e| events << e }
|
97
|
+
|
98
|
+
expect(events.size).to eq(2)
|
99
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
100
|
+
expect(events[0].get("tags")).to include("joinlines")
|
101
|
+
expect(events[1].get("message")).to eq "another first line"
|
102
|
+
expect(events[1].get("tags")).to be_nil
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should allow custom tag added to multiline events" do
|
106
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false, "multiline_tag" => "hurray")
|
107
|
+
lines = [ "hello world", " second line", "another first line" ]
|
108
|
+
line_producer.call(lines)
|
109
|
+
codec.flush { |e| events << e }
|
110
|
+
|
111
|
+
expect(events.size).to eq 2
|
112
|
+
expect(events[0].get("tags")).to include("hurray")
|
113
|
+
expect(events[1].get("tags")).to be_nil
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should handle new lines in messages" do
|
117
|
+
config.update("patterns" => '\D', "what" => "previous", "negate" => false)
|
118
|
+
lineio = StringIO.new("1234567890\nA234567890\nB234567890\n0987654321\n")
|
119
|
+
until lineio.eof
|
120
|
+
line = lineio.read(256) #when this is set to 36 the tests fail
|
121
|
+
codec.decode(line) {|evt| events.push(evt)}
|
122
|
+
end
|
123
|
+
codec.flush { |e| events << e }
|
124
|
+
expect(events[0].get("message")).to eq "1234567890\nA234567890\nB234567890"
|
125
|
+
expect(events[1].get("message")).to eq "0987654321"
|
126
|
+
end
|
127
|
+
|
128
|
+
it "should allow grok patterns to be used" do
|
129
|
+
config.update(
|
130
|
+
"patterns" => "^%{NUMBER} %{TIME}",
|
131
|
+
"negate" => true,
|
132
|
+
"what" => "previous"
|
133
|
+
)
|
134
|
+
|
135
|
+
lines = [ "120913 12:04:33 first line", "second line", "third line" ]
|
136
|
+
|
137
|
+
line_producer.call(lines)
|
138
|
+
codec.flush { |e| events << e }
|
139
|
+
|
140
|
+
insist { events.size } == 1
|
141
|
+
insist { events.first.get("message") } == lines.join("\n")
|
142
|
+
end
|
143
|
+
|
144
|
+
context "using default UTF-8 charset" do
|
145
|
+
|
146
|
+
it "should decode valid UTF-8 input" do
|
147
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
148
|
+
lines = [ "foobar", "κόσμε" ]
|
149
|
+
lines.each do |line|
|
150
|
+
expect(line.encoding.name).to eq "UTF-8"
|
151
|
+
expect(line.valid_encoding?).to be_truthy
|
152
|
+
codec.decode(line) { |event| events << event }
|
153
|
+
end
|
154
|
+
|
155
|
+
codec.flush { |e| events << e }
|
156
|
+
expect(events.size).to eq 2
|
157
|
+
|
158
|
+
events.zip(lines).each do |tuple|
|
159
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
160
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
it "should escape invalid sequences" do
|
165
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
166
|
+
lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
|
167
|
+
lines.each do |line|
|
168
|
+
expect(line.encoding.name).to eq "UTF-8"
|
169
|
+
expect(line.valid_encoding?).to eq false
|
170
|
+
|
171
|
+
codec.decode(line) { |event| events << event }
|
172
|
+
end
|
173
|
+
codec.flush { |e| events << e }
|
174
|
+
expect(events.size).to eq 2
|
175
|
+
|
176
|
+
events.zip(lines).each do |tuple|
|
177
|
+
expect(tuple[0].get("message")).to eq tuple[1].inspect[1..-2]
|
178
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
it "decodes and joins multiple patterns" do
|
183
|
+
config.update("patterns" => [ "^\\s", "^the following" ], "what" => [ "previous", "next" ], "negate" => [ false, false] )
|
184
|
+
lines = [ "hello world", " second line", "another first line", "the following message belongs to next", "I own the previous", "Another first" ]
|
185
|
+
|
186
|
+
lines.each do |line|
|
187
|
+
codec.decode(line) do |event|
|
188
|
+
events << event
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
codec.flush { |e| events << e }
|
193
|
+
|
194
|
+
#expect(events.size).to eq(4)
|
195
|
+
expect(events[0].get("message")).to eq "hello world\n second line"
|
196
|
+
expect(events[0].get("tags")).to include("joinlines")
|
197
|
+
expect(events[1].get("message")).to eq "another first line"
|
198
|
+
expect(events[1].get("tags")).to be_nil
|
199
|
+
expect(events[2].get("message")).to eq "the following message belongs to next\nI own the previous"
|
200
|
+
expect(events[2].get("tags")).to include("joinlines")
|
201
|
+
expect(events[3].get("message")).to eq "Another first"
|
202
|
+
expect(events[3].get("tags")).to be_nil
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
|
207
|
+
context "with valid non UTF-8 source encoding" do
|
208
|
+
|
209
|
+
it "should encode to UTF-8" do
|
210
|
+
config.update("charset" => "ISO-8859-1", "patterns" => "^\\s", "what" => "previous", "negate" => false)
|
211
|
+
samples = [
|
212
|
+
["foobar", "foobar"],
|
213
|
+
["\xE0 Montr\xE9al", "à Montréal"],
|
214
|
+
]
|
215
|
+
|
216
|
+
# lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
|
217
|
+
samples.map{|(a, b)| a.force_encoding("ISO-8859-1")}.each do |line|
|
218
|
+
expect(line.encoding.name).to eq "ISO-8859-1"
|
219
|
+
expect(line.valid_encoding?).to eq true
|
220
|
+
|
221
|
+
codec.decode(line) { |event| events << event }
|
222
|
+
end
|
223
|
+
codec.flush { |e| events << e }
|
224
|
+
expect(events.size).to eq 2
|
225
|
+
|
226
|
+
events.zip(samples.map{|(a, b)| b}).each do |tuple|
|
227
|
+
expect(tuple[1].encoding.name).to eq "UTF-8"
|
228
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
229
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
context "with invalid non UTF-8 source encoding" do
|
235
|
+
|
236
|
+
it "should encode to UTF-8" do
|
237
|
+
config.update("charset" => "ASCII-8BIT", "patterns" => "^\\s", "what" => "previous", "negate" => false)
|
238
|
+
samples = [
|
239
|
+
["\xE0 Montr\xE9al", "� Montr�al"],
|
240
|
+
["\xCE\xBA\xCF\x8C\xCF\x83\xCE\xBC\xCE\xB5", "����������"],
|
241
|
+
]
|
242
|
+
events = []
|
243
|
+
samples.map{|(a, b)| a.force_encoding("ASCII-8BIT")}.each do |line|
|
244
|
+
expect(line.encoding.name).to eq "ASCII-8BIT"
|
245
|
+
expect(line.valid_encoding?).to eq true
|
246
|
+
|
247
|
+
codec.decode(line) { |event| events << event }
|
248
|
+
end
|
249
|
+
codec.flush { |e| events << e }
|
250
|
+
expect(events.size).to eq 2
|
251
|
+
|
252
|
+
events.zip(samples.map{|(a, b)| b}).each do |tuple|
|
253
|
+
expect(tuple[1].encoding.name).to eq "UTF-8"
|
254
|
+
expect(tuple[0].get("message")).to eq tuple[1]
|
255
|
+
expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
context "with non closed multiline events" do
|
263
|
+
let(:random_number_of_events) { rand(300..1000) }
|
264
|
+
let(:sample_event) { "- Sample event" }
|
265
|
+
let(:events) { decode_events }
|
266
|
+
let(:unmerged_events_count) { events.collect { |event| event.get("message").split(LogStash::Codecs::Joinlines::NL).size }.inject(&:+) }
|
267
|
+
|
268
|
+
context "break on maximum_lines" do
|
269
|
+
let(:max_lines) { rand(10..100) }
|
270
|
+
let(:options) {
|
271
|
+
{
|
272
|
+
"patterns" => "^-",
|
273
|
+
"what" => "previous",
|
274
|
+
"negate" => false,
|
275
|
+
"max_lines" => max_lines,
|
276
|
+
"max_bytes" => "2 mb"
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
it "flushes on a maximum lines" do
|
281
|
+
expect(unmerged_events_count).to eq(random_number_of_events)
|
282
|
+
end
|
283
|
+
|
284
|
+
it "tags the event" do
|
285
|
+
expect(events.first.get("tags")).to include("joinlines_codec_max_lines_reached")
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
context "break on maximum bytes" do
|
290
|
+
let(:max_bytes) { rand(30..100) }
|
291
|
+
let(:options) {
|
292
|
+
{
|
293
|
+
"patterns" => "^-",
|
294
|
+
"what" => "previous",
|
295
|
+
"negate" => false,
|
296
|
+
"max_lines" => 20000,
|
297
|
+
"max_bytes" => max_bytes
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
it "flushes on a maximum bytes size" do
|
302
|
+
expect(unmerged_events_count).to eq(random_number_of_events)
|
303
|
+
end
|
304
|
+
|
305
|
+
it "tags the event" do
|
306
|
+
expect(events.first.get("tags")).to include("joinlines_codec_max_bytes_reached")
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
describe "auto flushing" do
|
312
|
+
let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
|
313
|
+
let(:events) { [] }
|
314
|
+
let(:lines) do
|
315
|
+
{ "en.log" => ["hello world", " second line", " third line"],
|
316
|
+
"fr.log" => ["Salut le Monde", " deuxième ligne", " troisième ligne"],
|
317
|
+
"de.log" => ["Hallo Welt"] }
|
318
|
+
end
|
319
|
+
let(:listener_class) { Jlc::LineListener }
|
320
|
+
let(:auto_flush_interval) { 2 }
|
321
|
+
|
322
|
+
let(:line_producer) do
|
323
|
+
lambda do |path|
|
324
|
+
#create a listener that holds upstream state
|
325
|
+
listener = listener_class.new(events, codec, path)
|
326
|
+
lines[path].each do |data|
|
327
|
+
listener.accept(data)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
let(:codec) do
|
333
|
+
Jlc::JoinlinesRspec.new(config).tap {|c| c.register}
|
334
|
+
end
|
335
|
+
|
336
|
+
before :each do
|
337
|
+
expect(LogStash::Codecs::Joinlines).to receive(:logger).and_return(Jlc::JoinlinesLogTracer.new).at_least(:once)
|
338
|
+
end
|
339
|
+
|
340
|
+
context "when auto_flush_interval is not set" do
|
341
|
+
it "does not build any events" do
|
342
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
|
343
|
+
line_producer.call("en.log")
|
344
|
+
sleep auto_flush_interval + 0.1
|
345
|
+
expect(events.size).to eq(0)
|
346
|
+
expect(codec.buffer_size).to eq(3)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
context "when the auto_flush raises an exception" do
|
351
|
+
let(:errmsg) { "OMG, Daleks!" }
|
352
|
+
let(:listener_class) { Jlc::LineErrorListener }
|
353
|
+
|
354
|
+
it "does not build any events, logs an error and the buffer data remains" do
|
355
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
|
356
|
+
"auto_flush_interval" => auto_flush_interval)
|
357
|
+
line_producer.call("en.log")
|
358
|
+
sleep(auto_flush_interval + 0.2)
|
359
|
+
msg, args = codec.logger.trace_for(:error)
|
360
|
+
expect(msg).to eq("Joinlines: flush downstream error")
|
361
|
+
expect(args[:exception].message).to eq(errmsg)
|
362
|
+
expect(events.size).to eq(0)
|
363
|
+
expect(codec.buffer_size).to eq(3)
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def assert_produced_events(key, sleeping)
|
368
|
+
line_producer.call(key)
|
369
|
+
sleep(sleeping)
|
370
|
+
yield
|
371
|
+
#expect(codec).to have_an_empty_buffer
|
372
|
+
end
|
373
|
+
|
374
|
+
context "mode: previous, when there are pauses between multiline file writes" do
|
375
|
+
it "auto-flushes events from the accumulated lines to the queue" do
|
376
|
+
config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
|
377
|
+
"auto_flush_interval" => auto_flush_interval)
|
378
|
+
|
379
|
+
assert_produced_events("en.log", auto_flush_interval + 0.1) do
|
380
|
+
expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
|
381
|
+
end
|
382
|
+
|
383
|
+
line_producer.call("fr.log")
|
384
|
+
#next line(s) come before auto-flush i.e. assert its buffered
|
385
|
+
sleep(auto_flush_interval - 0.3)
|
386
|
+
expect(codec.buffer_size).to eq(3)
|
387
|
+
expect(events.size).to eq(1)
|
388
|
+
|
389
|
+
assert_produced_events("de.log", auto_flush_interval + 0.1) do
|
390
|
+
# now the events are generated
|
391
|
+
expect(events[1]).to match_path_and_line("fr.log", lines["fr.log"])
|
392
|
+
expect(events[2]).to match_path_and_line("de.log", lines["de.log"])
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
context "mode: next, when there are pauses between multiline file writes" do
|
398
|
+
|
399
|
+
let(:lines) do
|
400
|
+
{ "en.log" => ["hello world++", "second line++", "third line"],
|
401
|
+
"fr.log" => ["Salut le Monde++", "deuxième ligne++", "troisième ligne"],
|
402
|
+
"de.log" => ["Hallo Welt"] }
|
403
|
+
end
|
404
|
+
|
405
|
+
it "auto-flushes events from the accumulated lines to the queue" do
|
406
|
+
config.update("patterns" => "\\+\\+$", "what" => "next", "negate" => false,
|
407
|
+
"auto_flush_interval" => auto_flush_interval)
|
408
|
+
|
409
|
+
assert_produced_events("en.log", auto_flush_interval + 0.1) do
|
410
|
+
# wait for auto_flush
|
411
|
+
expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
|
412
|
+
end
|
413
|
+
|
414
|
+
expect(codec).to have_an_empty_buffer
|
415
|
+
|
416
|
+
assert_produced_events("de.log", auto_flush_interval - 0.3) do
|
417
|
+
# this file is read before auto-flush, thus last event is not flushed yet
|
418
|
+
# This differs from logstash-codec-multiline because of not emitting
|
419
|
+
# last received event even if not matched
|
420
|
+
expect(events.size).to eq(1)
|
421
|
+
end
|
422
|
+
|
423
|
+
codec.flush { |event| events << event } # flushing here releases the event
|
424
|
+
expect(events.size).to eq(2)
|
425
|
+
expect(events[1]).to match_path_and_line(nil, lines["de.log"]) # but path is not set when emitted by flush
|
426
|
+
expect(codec).to have_an_empty_buffer
|
427
|
+
|
428
|
+
assert_produced_events("fr.log", auto_flush_interval + 0.1) do
|
429
|
+
# wait for auto_flush
|
430
|
+
expect(events[2]).to match_path_and_line("fr.log", lines["fr.log"])
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|