logstash-filter-kv 4.2.1 → 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/docs/index.asciidoc +35 -0
- data/lib/logstash/filters/kv.rb +159 -13
- data/logstash-filter-kv.gemspec +1 -1
- data/spec/filters/kv_spec.rb +126 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8b4c710387a809508f0e21af10fbaa40ba7e5d5767ed7a104b56445c98eef97
|
4
|
+
data.tar.gz: 717b1f7c691c20ff4bcf0faa009dee6e15246915542872625c91d3499a51910a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 732251129f69923ea68133b38d6aa5e346b67d124903f3c3e98a113f6b508aebe498fbe1329b4dc44589600d8423a442331b70cc4d8dfe6d51cbbd3c93bf58fd
|
7
|
+
data.tar.gz: 9f348a9d4318ab77337292b8fbfe17d717cce0bd2e192733f835cd943bc21a00ccb672ba26ec812470b77a8d620caa3a922b9056cf6e1f9b82389e7936d82bb7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 4.3.0
|
2
|
+
- Added a timeout enforcer which prevents inputs that are pathological against the generated parser from blocking
|
3
|
+
the pipeline. By default, timeout is a generous 30s, but can be configured or disabled entirely with the new
|
4
|
+
`timeout_millis` and `tag_on_timeout` directives ([#79](https://github.com/logstash-plugins/logstash-filter-kv/pull/79))
|
5
|
+
- Made error-handling configurable with `tag_on_failure` directive.
|
6
|
+
|
1
7
|
## 4.2.1
|
2
8
|
- Fixes performance regression introduced in 4.1.0 ([#70](https://github.com/logstash-plugins/logstash-filter-kv/issues/70))
|
3
9
|
|
data/docs/index.asciidoc
CHANGED
@@ -65,6 +65,9 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
65
65
|
| <<plugins-{type}s-{plugin}-remove_char_value>> |<<string,string>>|No
|
66
66
|
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
|
67
67
|
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
68
|
+
| <<plugins-{type}s-{plugin}-tag_on_failure>> |<<string,string>>|No
|
69
|
+
| <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
|
70
|
+
| <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
|
68
71
|
| <<plugins-{type}s-{plugin}-transform_key>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
|
69
72
|
| <<plugins-{type}s-{plugin}-transform_value>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
|
70
73
|
| <<plugins-{type}s-{plugin}-trim_key>> |<<string,string>>|No
|
@@ -335,6 +338,38 @@ For example, to place all keys into the event field kv:
|
|
335
338
|
[source,ruby]
|
336
339
|
filter { kv { target => "kv" } }
|
337
340
|
|
341
|
+
[id="plugins-{type}s-{plugin}-tag_on_failure"]
|
342
|
+
===== `tag_on_failure`
|
343
|
+
|
344
|
+
* Value type is <<string,string>>
|
345
|
+
* The default value for this setting is `_kv_filter_error`.
|
346
|
+
|
347
|
+
When a kv operation causes a runtime exception to be thrown within the plugin,
|
348
|
+
the operation is safely aborted without crashing the plugin, and the event is
|
349
|
+
tagged with the provided value.
|
350
|
+
|
351
|
+
[id="plugins-{type}s-{plugin}-tag_on_timeout"]
|
352
|
+
===== `tag_on_timeout`
|
353
|
+
|
354
|
+
* Value type is <<string,string>>
|
355
|
+
* The default value for this setting is `_kv_filter_timeout`.
|
356
|
+
|
357
|
+
When timeouts are enabled and a kv operation is aborted, the event is tagged
|
358
|
+
with the provided value (see: <<plugins-{type}s-{plugin}-timeout_millis>>).
|
359
|
+
|
360
|
+
[id="plugins-{type}s-{plugin}-timeout_millis"]
|
361
|
+
===== `timeout_millis`
|
362
|
+
|
363
|
+
* Value type is <<number, number>>
|
364
|
+
* The default value for this setting is 30000 (30 seconds).
|
365
|
+
* Set to zero (`0`) to disable timeouts
|
366
|
+
|
367
|
+
Timeouts provide a safeguard against inputs that are pathological against the
|
368
|
+
regular expressions that are used to extract key/value pairs. When parsing an
|
369
|
+
event exceeds this threshold the operation is aborted and the event is tagged
|
370
|
+
in order to prevent the operation from blocking the pipeline
|
371
|
+
(see: <<plugins-{type}s-{plugin}-tag_on_timeout>>).
|
372
|
+
|
338
373
|
[id="plugins-{type}s-{plugin}-transform_key"]
|
339
374
|
===== `transform_key`
|
340
375
|
|
data/lib/logstash/filters/kv.rb
CHANGED
@@ -317,6 +317,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
317
317
|
#
|
318
318
|
config :whitespace, :validate => %w(strict lenient), :default => "lenient"
|
319
319
|
|
320
|
+
# Attempt to terminate regexps after this amount of time.
|
321
|
+
# This applies per source field value if event has multiple values in the source field.
|
322
|
+
# This will never timeout early, but may take a little longer to timeout.
|
323
|
+
# Actual timeout is approximate based on a 250ms quantization.
|
324
|
+
# Set to 0 to disable timeouts
|
325
|
+
config :timeout_millis, :validate => :number, :default => 30_000
|
326
|
+
|
327
|
+
# Tag to apply if a kv regexp times out.
|
328
|
+
config :tag_on_timeout, :validate => :string, :default => '_kv_filter_timeout'
|
329
|
+
|
330
|
+
# Tag to apply if kv errors
|
331
|
+
config :tag_on_failure, :validate => :string, :default => '_kv_filter_error'
|
332
|
+
|
320
333
|
def register
|
321
334
|
if @value_split.empty?
|
322
335
|
raise LogStash::ConfigurationError, I18n.t(
|
@@ -392,21 +405,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
392
405
|
@value_split_re = value_split_pattern
|
393
406
|
|
394
407
|
@logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
|
408
|
+
|
409
|
+
@timeout_enforcer = initialize_timeout_enforcer
|
410
|
+
@timeout_enforcer.start!
|
395
411
|
end
|
396
412
|
|
397
413
|
def filter(event)
|
398
414
|
kv = Hash.new
|
399
415
|
value = event.get(@source)
|
400
416
|
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
417
|
+
@timeout_enforcer.execute do
|
418
|
+
case value
|
419
|
+
when nil
|
420
|
+
# Nothing to do
|
421
|
+
when String
|
422
|
+
parse(value, event, kv)
|
423
|
+
when Array
|
424
|
+
value.each { |v| parse(v, event, kv) }
|
425
|
+
else
|
426
|
+
@logger.warn("kv filter has no support for this type of data", :type => value.class, :value => value)
|
427
|
+
end
|
410
428
|
end
|
411
429
|
|
412
430
|
# Add default key-values for missing keys
|
@@ -422,15 +440,47 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
422
440
|
end
|
423
441
|
|
424
442
|
filter_matched(event)
|
443
|
+
|
444
|
+
rescue TimeoutException => e
|
445
|
+
logger.warn("Timeout reached in KV filter with value #{summarize(value)}")
|
446
|
+
event.tag(@tag_on_timeout)
|
425
447
|
rescue => ex
|
426
448
|
meta = { :exception => ex.message }
|
427
449
|
meta[:backtrace] = ex.backtrace if logger.debug?
|
428
450
|
logger.warn('Exception while parsing KV', meta)
|
429
|
-
event.tag(
|
451
|
+
event.tag(@tag_on_failure)
|
452
|
+
end
|
453
|
+
|
454
|
+
def close
|
455
|
+
@timeout_enforcer.stop!
|
430
456
|
end
|
431
457
|
|
432
458
|
private
|
433
459
|
|
460
|
+
# @overload summarize(value)
|
461
|
+
# @param value [Array]
|
462
|
+
# @return [String]
|
463
|
+
# @overload summarize(value)
|
464
|
+
# @param value [String]
|
465
|
+
# @return [String]
|
466
|
+
def summarize(value)
|
467
|
+
if value.kind_of?(Array)
|
468
|
+
value.map(&:to_s).map do |entry|
|
469
|
+
summarize(entry)
|
470
|
+
end.to_s
|
471
|
+
end
|
472
|
+
|
473
|
+
value = value.to_s
|
474
|
+
|
475
|
+
value.bytesize < 255 ? "`#{value}`" : "entry too large; first 255 chars are `#{value[0..255].dump}`"
|
476
|
+
end
|
477
|
+
|
478
|
+
def initialize_timeout_enforcer
|
479
|
+
return NULL_TIMEOUT_ENFORCER if @timeout_millis <= 0
|
480
|
+
|
481
|
+
TimeoutEnforcer.new(logger, @timeout_millis * 1_000_000)
|
482
|
+
end
|
483
|
+
|
434
484
|
def has_value_splitter?(s)
|
435
485
|
s =~ @value_split_re
|
436
486
|
end
|
@@ -487,9 +537,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
487
537
|
end
|
488
538
|
end
|
489
539
|
|
540
|
+
# Parses the given `text`, using the `event` for context, into the provided `kv_keys` hash
|
541
|
+
#
|
542
|
+
# @param text [String]: the text to parse
|
543
|
+
# @param event [LogStash::Event]: the event from which to extract context (e.g., sprintf vs (in|ex)clude keys)
|
544
|
+
# @param kv_keys [Hash{String=>Object}]: the hash in which to inject found key/value pairs
|
545
|
+
#
|
546
|
+
# @return [void]
|
490
547
|
def parse(text, event, kv_keys)
|
491
548
|
# short circuit parsing if the text does not contain the @value_split
|
492
|
-
return
|
549
|
+
return unless has_value_splitter?(text)
|
493
550
|
|
494
551
|
# Interpret dynamic keys for @include_keys and @exclude_keys
|
495
552
|
include_keys = @include_keys.map{|key| event.sprintf(key)}
|
@@ -520,7 +577,8 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
520
577
|
|
521
578
|
# recursively get more kv pairs from the value
|
522
579
|
if @recursive
|
523
|
-
innerKv =
|
580
|
+
innerKv = {}
|
581
|
+
parse(value, event, innerKv)
|
524
582
|
value = innerKv unless innerKv.empty?
|
525
583
|
end
|
526
584
|
|
@@ -534,7 +592,95 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
534
592
|
kv_keys[key] = value
|
535
593
|
end
|
536
594
|
end
|
595
|
+
end
|
596
|
+
|
597
|
+
class TimeoutException < RuntimeError
|
598
|
+
end
|
599
|
+
|
600
|
+
class TimeoutEnforcer
|
601
|
+
def initialize(logger, timeout_nanos)
|
602
|
+
@logger = logger
|
603
|
+
@running = java.util.concurrent.atomic.AtomicBoolean.new(false)
|
604
|
+
@timeout_nanos = timeout_nanos
|
537
605
|
|
538
|
-
|
606
|
+
# Stores running matches with their start time, this is used to cancel long running matches
|
607
|
+
# Is a map of Thread => start_time
|
608
|
+
@threads_to_start_time = java.util.concurrent.ConcurrentHashMap.new
|
609
|
+
end
|
610
|
+
|
611
|
+
def execute(&block)
|
612
|
+
begin
|
613
|
+
thread = java.lang.Thread.currentThread()
|
614
|
+
@threads_to_start_time.put(thread, java.lang.System.nanoTime)
|
615
|
+
|
616
|
+
yield
|
617
|
+
|
618
|
+
rescue InterruptedRegexpError, java.lang.InterruptedException => e
|
619
|
+
raise TimeoutException.new
|
620
|
+
ensure
|
621
|
+
# If the block finished, but interrupt was called after, we'll want to
|
622
|
+
# clear the interrupted status anyway
|
623
|
+
@threads_to_start_time.remove(thread)
|
624
|
+
thread.interrupted
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def start!
|
629
|
+
@running.set(true)
|
630
|
+
@logger.debug("Starting timeout enforcer (#{@timeout_nanos}ns)")
|
631
|
+
@timer_thread = Thread.new do
|
632
|
+
while @running.get()
|
633
|
+
begin
|
634
|
+
cancel_timed_out!
|
635
|
+
rescue Exception => e
|
636
|
+
@logger.error("Error while attempting to check/cancel excessively long kv patterns",
|
637
|
+
:message => e.message,
|
638
|
+
:class => e.class.name,
|
639
|
+
:backtrace => e.backtrace
|
640
|
+
)
|
641
|
+
end
|
642
|
+
sleep 0.25
|
643
|
+
end
|
644
|
+
end
|
645
|
+
end
|
646
|
+
|
647
|
+
def stop!
|
648
|
+
@running.set(false)
|
649
|
+
@logger.debug("Shutting down timeout enforcer")
|
650
|
+
# Check for the thread mostly for a fast start/shutdown scenario
|
651
|
+
@timer_thread.join if @timer_thread
|
652
|
+
end
|
653
|
+
|
654
|
+
private
|
655
|
+
|
656
|
+
def cancel_timed_out!
|
657
|
+
now = java.lang.System.nanoTime # save ourselves some nanotime calls
|
658
|
+
@threads_to_start_time.keySet.each do |thread|
|
659
|
+
# Use compute to lock this value
|
660
|
+
@threads_to_start_time.computeIfPresent(thread) do |thread, start_time|
|
661
|
+
if start_time < now && now - start_time > @timeout_nanos
|
662
|
+
thread.interrupt
|
663
|
+
nil # Delete the key
|
664
|
+
else
|
665
|
+
start_time # preserve the key
|
666
|
+
end
|
667
|
+
end
|
668
|
+
end
|
669
|
+
end
|
670
|
+
end
|
671
|
+
|
672
|
+
class NullTimeoutEnforcer
|
673
|
+
def execute(&block)
|
674
|
+
yield
|
675
|
+
end
|
676
|
+
|
677
|
+
def start!
|
678
|
+
# no-op
|
679
|
+
end
|
680
|
+
|
681
|
+
def stop!
|
682
|
+
# no-op
|
683
|
+
end
|
539
684
|
end
|
685
|
+
NULL_TIMEOUT_ENFORCER = NullTimeoutEnforcer.new
|
540
686
|
end
|
data/logstash-filter-kv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-kv'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.3.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses key-value pairs"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/kv_spec.rb
CHANGED
@@ -1093,5 +1093,131 @@ context 'runtime errors' do
|
|
1093
1093
|
|
1094
1094
|
plugin.filter(event)
|
1095
1095
|
end
|
1096
|
+
context 'when a custom tag is defined' do
|
1097
|
+
let(:options) { super().merge("tag_on_failure" => "KV-ERROR")}
|
1098
|
+
it 'tags the event with the custom tag' do
|
1099
|
+
plugin.filter(event)
|
1100
|
+
expect(event.get('tags')).to_not be_nil
|
1101
|
+
expect(event.get('tags')).to include('KV-ERROR')
|
1102
|
+
end
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
# This group intentionally uses patterns that are vulnerable to pathological inputs to test timeouts.
|
1108
|
+
#
|
1109
|
+
# patterns of the form `/(?:x+x+)+y/` are vulnerable to inputs that have long sequences matching `/x/`
|
1110
|
+
# that are _not_ followed by a sequence matching `/y/`.
|
1111
|
+
context 'timeouts' do
|
1112
|
+
let(:options) do
|
1113
|
+
{
|
1114
|
+
"value_split_pattern" => "(?:=+=+)+:"
|
1115
|
+
}
|
1116
|
+
end
|
1117
|
+
subject(:plugin) do
|
1118
|
+
LogStash::Filters::KV.new(options).instance_exec { register; self }
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
let(:data) { {"message" => message} }
|
1122
|
+
let(:event) { LogStash::Event.new(data) }
|
1123
|
+
let(:message) { "foo=bar hello=world" }
|
1124
|
+
|
1125
|
+
after(:each) { plugin.close }
|
1126
|
+
|
1127
|
+
# since we are dealing with potentially-pathological specs, ensure specs fail in a timely
|
1128
|
+
# manner if they block for longer than `spec_blocking_threshold_seconds`.
|
1129
|
+
let(:spec_blocking_threshold_seconds) { 10 }
|
1130
|
+
around(:each) do |example|
|
1131
|
+
begin
|
1132
|
+
blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
|
1133
|
+
Timeout.timeout(spec_blocking_threshold_seconds, blocking_exception_class, &example)
|
1134
|
+
rescue blocking_exception_class
|
1135
|
+
fail('execution blocked')
|
1136
|
+
end
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
context 'when timeouts are enabled' do
|
1140
|
+
let(:options) { super().merge("timeout_millis" => 250) }
|
1141
|
+
let(:spec_blocking_threshold_seconds) { 3 }
|
1142
|
+
|
1143
|
+
context 'when given a pathological input' do
|
1144
|
+
let(:message) { "foo========:bar baz================================================bingo" }
|
1145
|
+
|
1146
|
+
it 'tags the event' do
|
1147
|
+
plugin.filter(event)
|
1148
|
+
|
1149
|
+
expect(event.get('tags')).to be_a_kind_of(Enumerable)
|
1150
|
+
expect(event.get('tags')).to include('_kv_filter_timeout')
|
1151
|
+
end
|
1152
|
+
|
1153
|
+
context 'when given a custom `tag_on_timeout`' do
|
1154
|
+
let(:options) { super().merge('tag_on_timeout' => 'BADKV') }
|
1155
|
+
|
1156
|
+
it 'tags the event with the custom tag' do
|
1157
|
+
plugin.filter(event)
|
1158
|
+
|
1159
|
+
expect(event.get('tags')).to be_a_kind_of(Enumerable)
|
1160
|
+
expect(event.get('tags')).to include('BADKV')
|
1161
|
+
end
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
context 'when default_keys are provided' do
|
1165
|
+
let(:options) { super().merge("default_keys" => {"default" => "key"})}
|
1166
|
+
|
1167
|
+
it 'does not populate default keys' do
|
1168
|
+
plugin.filter(event)
|
1169
|
+
|
1170
|
+
expect(event).to_not include('default')
|
1171
|
+
end
|
1172
|
+
end
|
1173
|
+
context 'when filter_matched hooks are provided' do
|
1174
|
+
let(:options) { super().merge("add_field" => {"kv" => "success"})}
|
1175
|
+
|
1176
|
+
it 'does not call filter_matched hooks' do
|
1177
|
+
plugin.filter(event)
|
1178
|
+
|
1179
|
+
expect(event).to_not include('kv')
|
1180
|
+
end
|
1181
|
+
end
|
1182
|
+
end
|
1183
|
+
|
1184
|
+
context 'when given a non-pathological input' do
|
1185
|
+
let(:message) { "foo==:bar baz==:bingo" }
|
1186
|
+
|
1187
|
+
it 'extracts the k/v' do
|
1188
|
+
plugin.filter(event)
|
1189
|
+
|
1190
|
+
expect(event.get('foo')).to eq('bar')
|
1191
|
+
expect(event.get('baz')).to eq('bingo')
|
1192
|
+
end
|
1193
|
+
end
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
context 'when timeouts are explicitly disabled' do
|
1197
|
+
let(:options) { super().merge("timeout_millis" => 0) }
|
1198
|
+
|
1199
|
+
context 'when given a pathological input' do
|
1200
|
+
let(:message) { "foo========:bar baz================================================================bingo"}
|
1201
|
+
|
1202
|
+
it 'blocks for at least 3 seconds' do
|
1203
|
+
blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
|
1204
|
+
expect do
|
1205
|
+
Timeout.timeout(3, blocking_exception_class) do
|
1206
|
+
plugin.filter(event)
|
1207
|
+
end
|
1208
|
+
end.to raise_exception(blocking_exception_class)
|
1209
|
+
end
|
1210
|
+
end
|
1211
|
+
|
1212
|
+
context 'when given a non-pathological input' do
|
1213
|
+
let(:message) { "foo==:bar baz==:bingo" }
|
1214
|
+
|
1215
|
+
it 'extracts the k/v' do
|
1216
|
+
plugin.filter(event)
|
1217
|
+
|
1218
|
+
expect(event.get('foo')).to eq('bar')
|
1219
|
+
expect(event.get('baz')).to eq('bingo')
|
1220
|
+
end
|
1221
|
+
end
|
1096
1222
|
end
|
1097
1223
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-kv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|