logstash-filter-kv 4.2.1 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b34ac3de7b0a2195dd497f68fe24b52acf22469467cd4fdce5052fc3c893b703
4
- data.tar.gz: c8979c98ca325f26d09722000ada76d205f1a5499fcaf77e0ee5b03fd4f00088
3
+ metadata.gz: c8b4c710387a809508f0e21af10fbaa40ba7e5d5767ed7a104b56445c98eef97
4
+ data.tar.gz: 717b1f7c691c20ff4bcf0faa009dee6e15246915542872625c91d3499a51910a
5
5
  SHA512:
6
- metadata.gz: 91ba5899a87d934d7bd0b19a2fc18f0c6ea8dabd01bb093e558536afcf53a1568cbe72035b1c61e4cb071c7cdd4c2b9d8195fa7fe4d2d3f8bec91aae2ec99979
7
- data.tar.gz: feef4f88bd22e8e3486e583fdab40d1b3e5e70590f704436579790f464afcf76de183ba49a57502d915fab2cc1358f0faaf215c3bf5759deb882879aad9b7b09
6
+ metadata.gz: 732251129f69923ea68133b38d6aa5e346b67d124903f3c3e98a113f6b508aebe498fbe1329b4dc44589600d8423a442331b70cc4d8dfe6d51cbbd3c93bf58fd
7
+ data.tar.gz: 9f348a9d4318ab77337292b8fbfe17d717cce0bd2e192733f835cd943bc21a00ccb672ba26ec812470b77a8d620caa3a922b9056cf6e1f9b82389e7936d82bb7
@@ -1,3 +1,9 @@
1
+ ## 4.3.0
2
+ - Added a timeout enforcer which prevents inputs that are pathological against the generated parser from blocking
3
+ the pipeline. By default, timeout is a generous 30s, but can be configured or disabled entirely with the new
4
+ `timeout_millis` and `tag_on_timeout` directives ([#79](https://github.com/logstash-plugins/logstash-filter-kv/pull/79))
5
+ - Made error-handling configurable with `tag_on_failure` directive.
6
+
1
7
  ## 4.2.1
2
8
  - Fixes performance regression introduced in 4.1.0 ([#70](https://github.com/logstash-plugins/logstash-filter-kv/issues/70))
3
9
 
@@ -65,6 +65,9 @@ This plugin supports the following configuration options plus the <<plugins-{typ
65
65
  | <<plugins-{type}s-{plugin}-remove_char_value>> |<<string,string>>|No
66
66
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
67
67
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
68
+ | <<plugins-{type}s-{plugin}-tag_on_failure>> |<<string,string>>|No
69
+ | <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
70
+ | <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
68
71
  | <<plugins-{type}s-{plugin}-transform_key>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
69
72
  | <<plugins-{type}s-{plugin}-transform_value>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
70
73
  | <<plugins-{type}s-{plugin}-trim_key>> |<<string,string>>|No
@@ -335,6 +338,38 @@ For example, to place all keys into the event field kv:
335
338
  [source,ruby]
336
339
  filter { kv { target => "kv" } }
337
340
 
341
+ [id="plugins-{type}s-{plugin}-tag_on_failure"]
342
+ ===== `tag_on_failure`
343
+
344
+ * Value type is <<string,string>>
345
+ * The default value for this setting is `_kv_filter_error`.
346
+
347
+ When a kv operation causes a runtime exception to be thrown within the plugin,
348
+ the operation is safely aborted without crashing the plugin, and the event is
349
+ tagged with the provided value.
350
+
351
+ [id="plugins-{type}s-{plugin}-tag_on_timeout"]
352
+ ===== `tag_on_timeout`
353
+
354
+ * Value type is <<string,string>>
355
+ * The default value for this setting is `_kv_filter_timeout`.
356
+
357
+ When timeouts are enabled and a kv operation is aborted, the event is tagged
358
+ with the provided value (see: <<plugins-{type}s-{plugin}-timeout_millis>>).
359
+
360
+ [id="plugins-{type}s-{plugin}-timeout_millis"]
361
+ ===== `timeout_millis`
362
+
363
+ * Value type is <<number, number>>
364
+ * The default value for this setting is 30000 (30 seconds).
365
+ * Set to zero (`0`) to disable timeouts
366
+
367
+ Timeouts provide a safeguard against inputs that are pathological against the
368
+ regular expressions that are used to extract key/value pairs. When parsing an
369
+ event exceeds this threshold the operation is aborted and the event is tagged
370
+ in order to prevent the operation from blocking the pipeline
371
+ (see: <<plugins-{type}s-{plugin}-tag_on_timeout>>).
372
+
338
373
  [id="plugins-{type}s-{plugin}-transform_key"]
339
374
  ===== `transform_key`
340
375
 
@@ -317,6 +317,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
317
317
  #
318
318
  config :whitespace, :validate => %w(strict lenient), :default => "lenient"
319
319
 
320
+ # Attempt to terminate regexps after this amount of time.
321
+ # This applies per source field value if event has multiple values in the source field.
322
+ # This will never timeout early, but may take a little longer to timeout.
323
+ # Actual timeout is approximate based on a 250ms quantization.
324
+ # Set to 0 to disable timeouts
325
+ config :timeout_millis, :validate => :number, :default => 30_000
326
+
327
+ # Tag to apply if a kv regexp times out.
328
+ config :tag_on_timeout, :validate => :string, :default => '_kv_filter_timeout'
329
+
330
+ # Tag to apply if kv errors
331
+ config :tag_on_failure, :validate => :string, :default => '_kv_filter_error'
332
+
320
333
  def register
321
334
  if @value_split.empty?
322
335
  raise LogStash::ConfigurationError, I18n.t(
@@ -392,21 +405,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
392
405
  @value_split_re = value_split_pattern
393
406
 
394
407
  @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
408
+
409
+ @timeout_enforcer = initialize_timeout_enforcer
410
+ @timeout_enforcer.start!
395
411
  end
396
412
 
397
413
  def filter(event)
398
414
  kv = Hash.new
399
415
  value = event.get(@source)
400
416
 
401
- case value
402
- when nil
403
- # Nothing to do
404
- when String
405
- kv = parse(value, event, kv)
406
- when Array
407
- value.each { |v| kv = parse(v, event, kv) }
408
- else
409
- @logger.warn("kv filter has no support for this type of data", :type => value.class, :value => value)
417
+ @timeout_enforcer.execute do
418
+ case value
419
+ when nil
420
+ # Nothing to do
421
+ when String
422
+ parse(value, event, kv)
423
+ when Array
424
+ value.each { |v| parse(v, event, kv) }
425
+ else
426
+ @logger.warn("kv filter has no support for this type of data", :type => value.class, :value => value)
427
+ end
410
428
  end
411
429
 
412
430
  # Add default key-values for missing keys
@@ -422,15 +440,47 @@ class LogStash::Filters::KV < LogStash::Filters::Base
422
440
  end
423
441
 
424
442
  filter_matched(event)
443
+
444
+ rescue TimeoutException => e
445
+ logger.warn("Timeout reached in KV filter with value #{summarize(value)}")
446
+ event.tag(@tag_on_timeout)
425
447
  rescue => ex
426
448
  meta = { :exception => ex.message }
427
449
  meta[:backtrace] = ex.backtrace if logger.debug?
428
450
  logger.warn('Exception while parsing KV', meta)
429
- event.tag('_kv_filter_error')
451
+ event.tag(@tag_on_failure)
452
+ end
453
+
454
+ def close
455
+ @timeout_enforcer.stop!
430
456
  end
431
457
 
432
458
  private
433
459
 
460
+ # @overload summarize(value)
461
+ # @param value [Array]
462
+ # @return [String]
463
+ # @overload summarize(value)
464
+ # @param value [String]
465
+ # @return [String]
466
+ def summarize(value)
467
+ if value.kind_of?(Array)
468
+ value.map(&:to_s).map do |entry|
469
+ summarize(entry)
470
+ end.to_s
471
+ end
472
+
473
+ value = value.to_s
474
+
475
+ value.bytesize < 255 ? "`#{value}`" : "entry too large; first 255 chars are `#{value[0..255].dump}`"
476
+ end
477
+
478
+ def initialize_timeout_enforcer
479
+ return NULL_TIMEOUT_ENFORCER if @timeout_millis <= 0
480
+
481
+ TimeoutEnforcer.new(logger, @timeout_millis * 1_000_000)
482
+ end
483
+
434
484
  def has_value_splitter?(s)
435
485
  s =~ @value_split_re
436
486
  end
@@ -487,9 +537,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
487
537
  end
488
538
  end
489
539
 
540
+ # Parses the given `text`, using the `event` for context, into the provided `kv_keys` hash
541
+ #
542
+ # @param text [String]: the text to parse
543
+ # @param event [LogStash::Event]: the event from which to extract context (e.g., sprintf vs (in|ex)clude keys)
544
+ # @param kv_keys [Hash{String=>Object}]: the hash in which to inject found key/value pairs
545
+ #
546
+ # @return [void]
490
547
  def parse(text, event, kv_keys)
491
548
  # short circuit parsing if the text does not contain the @value_split
492
- return kv_keys unless has_value_splitter?(text)
549
+ return unless has_value_splitter?(text)
493
550
 
494
551
  # Interpret dynamic keys for @include_keys and @exclude_keys
495
552
  include_keys = @include_keys.map{|key| event.sprintf(key)}
@@ -520,7 +577,8 @@ class LogStash::Filters::KV < LogStash::Filters::Base
520
577
 
521
578
  # recursively get more kv pairs from the value
522
579
  if @recursive
523
- innerKv = parse(value, event, {})
580
+ innerKv = {}
581
+ parse(value, event, innerKv)
524
582
  value = innerKv unless innerKv.empty?
525
583
  end
526
584
 
@@ -534,7 +592,95 @@ class LogStash::Filters::KV < LogStash::Filters::Base
534
592
  kv_keys[key] = value
535
593
  end
536
594
  end
595
+ end
596
+
597
+ class TimeoutException < RuntimeError
598
+ end
599
+
600
+ class TimeoutEnforcer
601
+ def initialize(logger, timeout_nanos)
602
+ @logger = logger
603
+ @running = java.util.concurrent.atomic.AtomicBoolean.new(false)
604
+ @timeout_nanos = timeout_nanos
537
605
 
538
- return kv_keys
606
+ # Stores running matches with their start time, this is used to cancel long running matches
607
+ # Is a map of Thread => start_time
608
+ @threads_to_start_time = java.util.concurrent.ConcurrentHashMap.new
609
+ end
610
+
611
+ def execute(&block)
612
+ begin
613
+ thread = java.lang.Thread.currentThread()
614
+ @threads_to_start_time.put(thread, java.lang.System.nanoTime)
615
+
616
+ yield
617
+
618
+ rescue InterruptedRegexpError, java.lang.InterruptedException => e
619
+ raise TimeoutException.new
620
+ ensure
621
+ # If the block finished, but interrupt was called after, we'll want to
622
+ # clear the interrupted status anyway
623
+ @threads_to_start_time.remove(thread)
624
+ thread.interrupted
625
+ end
626
+ end
627
+
628
+ def start!
629
+ @running.set(true)
630
+ @logger.debug("Starting timeout enforcer (#{@timeout_nanos}ns)")
631
+ @timer_thread = Thread.new do
632
+ while @running.get()
633
+ begin
634
+ cancel_timed_out!
635
+ rescue Exception => e
636
+ @logger.error("Error while attempting to check/cancel excessively long kv patterns",
637
+ :message => e.message,
638
+ :class => e.class.name,
639
+ :backtrace => e.backtrace
640
+ )
641
+ end
642
+ sleep 0.25
643
+ end
644
+ end
645
+ end
646
+
647
+ def stop!
648
+ @running.set(false)
649
+ @logger.debug("Shutting down timeout enforcer")
650
+ # Check for the thread mostly for a fast start/shutdown scenario
651
+ @timer_thread.join if @timer_thread
652
+ end
653
+
654
+ private
655
+
656
+ def cancel_timed_out!
657
+ now = java.lang.System.nanoTime # save ourselves some nanotime calls
658
+ @threads_to_start_time.keySet.each do |thread|
659
+ # Use compute to lock this value
660
+ @threads_to_start_time.computeIfPresent(thread) do |thread, start_time|
661
+ if start_time < now && now - start_time > @timeout_nanos
662
+ thread.interrupt
663
+ nil # Delete the key
664
+ else
665
+ start_time # preserve the key
666
+ end
667
+ end
668
+ end
669
+ end
670
+ end
671
+
672
+ class NullTimeoutEnforcer
673
+ def execute(&block)
674
+ yield
675
+ end
676
+
677
+ def start!
678
+ # no-op
679
+ end
680
+
681
+ def stop!
682
+ # no-op
683
+ end
539
684
  end
685
+ NULL_TIMEOUT_ENFORCER = NullTimeoutEnforcer.new
540
686
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.2.1'
4
+ s.version = '4.3.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1093,5 +1093,131 @@ context 'runtime errors' do
1093
1093
 
1094
1094
  plugin.filter(event)
1095
1095
  end
1096
+ context 'when a custom tag is defined' do
1097
+ let(:options) { super().merge("tag_on_failure" => "KV-ERROR")}
1098
+ it 'tags the event with the custom tag' do
1099
+ plugin.filter(event)
1100
+ expect(event.get('tags')).to_not be_nil
1101
+ expect(event.get('tags')).to include('KV-ERROR')
1102
+ end
1103
+ end
1104
+ end
1105
+ end
1106
+
1107
+ # This group intentionally uses patterns that are vulnerable to pathological inputs to test timeouts.
1108
+ #
1109
+ # patterns of the form `/(?:x+x+)+y/` are vulnerable to inputs that have long sequences matching `/x/`
1110
+ # that are _not_ followed by a sequence matching `/y/`.
1111
+ context 'timeouts' do
1112
+ let(:options) do
1113
+ {
1114
+ "value_split_pattern" => "(?:=+=+)+:"
1115
+ }
1116
+ end
1117
+ subject(:plugin) do
1118
+ LogStash::Filters::KV.new(options).instance_exec { register; self }
1119
+ end
1120
+
1121
+ let(:data) { {"message" => message} }
1122
+ let(:event) { LogStash::Event.new(data) }
1123
+ let(:message) { "foo=bar hello=world" }
1124
+
1125
+ after(:each) { plugin.close }
1126
+
1127
+ # since we are dealing with potentially-pathological specs, ensure specs fail in a timely
1128
+ # manner if they block for longer than `spec_blocking_threshold_seconds`.
1129
+ let(:spec_blocking_threshold_seconds) { 10 }
1130
+ around(:each) do |example|
1131
+ begin
1132
+ blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
1133
+ Timeout.timeout(spec_blocking_threshold_seconds, blocking_exception_class, &example)
1134
+ rescue blocking_exception_class
1135
+ fail('execution blocked')
1136
+ end
1137
+ end
1138
+
1139
+ context 'when timeouts are enabled' do
1140
+ let(:options) { super().merge("timeout_millis" => 250) }
1141
+ let(:spec_blocking_threshold_seconds) { 3 }
1142
+
1143
+ context 'when given a pathological input' do
1144
+ let(:message) { "foo========:bar baz================================================bingo" }
1145
+
1146
+ it 'tags the event' do
1147
+ plugin.filter(event)
1148
+
1149
+ expect(event.get('tags')).to be_a_kind_of(Enumerable)
1150
+ expect(event.get('tags')).to include('_kv_filter_timeout')
1151
+ end
1152
+
1153
+ context 'when given a custom `tag_on_timeout`' do
1154
+ let(:options) { super().merge('tag_on_timeout' => 'BADKV') }
1155
+
1156
+ it 'tags the event with the custom tag' do
1157
+ plugin.filter(event)
1158
+
1159
+ expect(event.get('tags')).to be_a_kind_of(Enumerable)
1160
+ expect(event.get('tags')).to include('BADKV')
1161
+ end
1162
+ end
1163
+
1164
+ context 'when default_keys are provided' do
1165
+ let(:options) { super().merge("default_keys" => {"default" => "key"})}
1166
+
1167
+ it 'does not populate default keys' do
1168
+ plugin.filter(event)
1169
+
1170
+ expect(event).to_not include('default')
1171
+ end
1172
+ end
1173
+ context 'when filter_matched hooks are provided' do
1174
+ let(:options) { super().merge("add_field" => {"kv" => "success"})}
1175
+
1176
+ it 'does not call filter_matched hooks' do
1177
+ plugin.filter(event)
1178
+
1179
+ expect(event).to_not include('kv')
1180
+ end
1181
+ end
1182
+ end
1183
+
1184
+ context 'when given a non-pathological input' do
1185
+ let(:message) { "foo==:bar baz==:bingo" }
1186
+
1187
+ it 'extracts the k/v' do
1188
+ plugin.filter(event)
1189
+
1190
+ expect(event.get('foo')).to eq('bar')
1191
+ expect(event.get('baz')).to eq('bingo')
1192
+ end
1193
+ end
1194
+ end
1195
+
1196
+ context 'when timeouts are explicitly disabled' do
1197
+ let(:options) { super().merge("timeout_millis" => 0) }
1198
+
1199
+ context 'when given a pathological input' do
1200
+ let(:message) { "foo========:bar baz================================================================bingo"}
1201
+
1202
+ it 'blocks for at least 3 seconds' do
1203
+ blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
1204
+ expect do
1205
+ Timeout.timeout(3, blocking_exception_class) do
1206
+ plugin.filter(event)
1207
+ end
1208
+ end.to raise_exception(blocking_exception_class)
1209
+ end
1210
+ end
1211
+
1212
+ context 'when given a non-pathological input' do
1213
+ let(:message) { "foo==:bar baz==:bingo" }
1214
+
1215
+ it 'extracts the k/v' do
1216
+ plugin.filter(event)
1217
+
1218
+ expect(event.get('foo')).to eq('bar')
1219
+ expect(event.get('baz')).to eq('bingo')
1220
+ end
1221
+ end
1096
1222
  end
1097
1223
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.1
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-04 00:00:00.000000000 Z
11
+ date: 2019-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement