logstash-filter-kv 4.2.1 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b34ac3de7b0a2195dd497f68fe24b52acf22469467cd4fdce5052fc3c893b703
4
- data.tar.gz: c8979c98ca325f26d09722000ada76d205f1a5499fcaf77e0ee5b03fd4f00088
3
+ metadata.gz: c8b4c710387a809508f0e21af10fbaa40ba7e5d5767ed7a104b56445c98eef97
4
+ data.tar.gz: 717b1f7c691c20ff4bcf0faa009dee6e15246915542872625c91d3499a51910a
5
5
  SHA512:
6
- metadata.gz: 91ba5899a87d934d7bd0b19a2fc18f0c6ea8dabd01bb093e558536afcf53a1568cbe72035b1c61e4cb071c7cdd4c2b9d8195fa7fe4d2d3f8bec91aae2ec99979
7
- data.tar.gz: feef4f88bd22e8e3486e583fdab40d1b3e5e70590f704436579790f464afcf76de183ba49a57502d915fab2cc1358f0faaf215c3bf5759deb882879aad9b7b09
6
+ metadata.gz: 732251129f69923ea68133b38d6aa5e346b67d124903f3c3e98a113f6b508aebe498fbe1329b4dc44589600d8423a442331b70cc4d8dfe6d51cbbd3c93bf58fd
7
+ data.tar.gz: 9f348a9d4318ab77337292b8fbfe17d717cce0bd2e192733f835cd943bc21a00ccb672ba26ec812470b77a8d620caa3a922b9056cf6e1f9b82389e7936d82bb7
@@ -1,3 +1,9 @@
1
+ ## 4.3.0
2
+ - Added a timeout enforcer which prevents inputs that are pathological against the generated parser from blocking
3
+ the pipeline. By default, timeout is a generous 30s, but can be configured or disabled entirely with the new
4
+ `timeout_millis` and `tag_on_timeout` directives ([#79](https://github.com/logstash-plugins/logstash-filter-kv/pull/79))
5
+ - Made error-handling configurable with `tag_on_failure` directive.
6
+
1
7
  ## 4.2.1
2
8
  - Fixes performance regression introduced in 4.1.0 ([#70](https://github.com/logstash-plugins/logstash-filter-kv/issues/70))
3
9
 
@@ -65,6 +65,9 @@ This plugin supports the following configuration options plus the <<plugins-{typ
65
65
  | <<plugins-{type}s-{plugin}-remove_char_value>> |<<string,string>>|No
66
66
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
67
67
  | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
68
+ | <<plugins-{type}s-{plugin}-tag_on_failure>> |<<string,string>>|No
69
+ | <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
70
+ | <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
68
71
  | <<plugins-{type}s-{plugin}-transform_key>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
69
72
  | <<plugins-{type}s-{plugin}-transform_value>> |<<string,string>>, one of `["lowercase", "uppercase", "capitalize"]`|No
70
73
  | <<plugins-{type}s-{plugin}-trim_key>> |<<string,string>>|No
@@ -335,6 +338,38 @@ For example, to place all keys into the event field kv:
335
338
  [source,ruby]
336
339
  filter { kv { target => "kv" } }
337
340
 
341
+ [id="plugins-{type}s-{plugin}-tag_on_failure"]
342
+ ===== `tag_on_failure`
343
+
344
+ * Value type is <<string,string>>
345
+ * The default value for this setting is `_kv_filter_error`.
346
+
347
+ When a kv operation causes a runtime exception to be thrown within the plugin,
348
+ the operation is safely aborted without crashing the plugin, and the event is
349
+ tagged with the provided value.
350
+
351
+ [id="plugins-{type}s-{plugin}-tag_on_timeout"]
352
+ ===== `tag_on_timeout`
353
+
354
+ * Value type is <<string,string>>
355
+ * The default value for this setting is `_kv_filter_timeout`.
356
+
357
+ When timeouts are enabled and a kv operation is aborted, the event is tagged
358
+ with the provided value (see: <<plugins-{type}s-{plugin}-timeout_millis>>).
359
+
360
+ [id="plugins-{type}s-{plugin}-timeout_millis"]
361
+ ===== `timeout_millis`
362
+
363
+ * Value type is <<number, number>>
364
+ * The default value for this setting is 30000 (30 seconds).
365
+ * Set to zero (`0`) to disable timeouts
366
+
367
+ Timeouts provide a safeguard against inputs that are pathological against the
368
+ regular expressions that are used to extract key/value pairs. When parsing an
369
+ event exceeds this threshold the operation is aborted and the event is tagged
370
+ in order to prevent the operation from blocking the pipeline
371
+ (see: <<plugins-{type}s-{plugin}-tag_on_timeout>>).
372
+
338
373
  [id="plugins-{type}s-{plugin}-transform_key"]
339
374
  ===== `transform_key`
340
375
 
@@ -317,6 +317,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
317
317
  #
318
318
  config :whitespace, :validate => %w(strict lenient), :default => "lenient"
319
319
 
320
+ # Attempt to terminate regexps after this amount of time.
321
+ # This applies per source field value if event has multiple values in the source field.
322
+ # This will never timeout early, but may take a little longer to timeout.
323
+ # Actual timeout is approximate based on a 250ms quantization.
324
+ # Set to 0 to disable timeouts
325
+ config :timeout_millis, :validate => :number, :default => 30_000
326
+
327
+ # Tag to apply if a kv regexp times out.
328
+ config :tag_on_timeout, :validate => :string, :default => '_kv_filter_timeout'
329
+
330
+ # Tag to apply if kv errors
331
+ config :tag_on_failure, :validate => :string, :default => '_kv_filter_error'
332
+
320
333
  def register
321
334
  if @value_split.empty?
322
335
  raise LogStash::ConfigurationError, I18n.t(
@@ -392,21 +405,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
392
405
  @value_split_re = value_split_pattern
393
406
 
394
407
  @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
408
+
409
+ @timeout_enforcer = initialize_timeout_enforcer
410
+ @timeout_enforcer.start!
395
411
  end
396
412
 
397
413
  def filter(event)
398
414
  kv = Hash.new
399
415
  value = event.get(@source)
400
416
 
401
- case value
402
- when nil
403
- # Nothing to do
404
- when String
405
- kv = parse(value, event, kv)
406
- when Array
407
- value.each { |v| kv = parse(v, event, kv) }
408
- else
409
- @logger.warn("kv filter has no support for this type of data", :type => value.class, :value => value)
417
+ @timeout_enforcer.execute do
418
+ case value
419
+ when nil
420
+ # Nothing to do
421
+ when String
422
+ parse(value, event, kv)
423
+ when Array
424
+ value.each { |v| parse(v, event, kv) }
425
+ else
426
+ @logger.warn("kv filter has no support for this type of data", :type => value.class, :value => value)
427
+ end
410
428
  end
411
429
 
412
430
  # Add default key-values for missing keys
@@ -422,15 +440,47 @@ class LogStash::Filters::KV < LogStash::Filters::Base
422
440
  end
423
441
 
424
442
  filter_matched(event)
443
+
444
+ rescue TimeoutException => e
445
+ logger.warn("Timeout reached in KV filter with value #{summarize(value)}")
446
+ event.tag(@tag_on_timeout)
425
447
  rescue => ex
426
448
  meta = { :exception => ex.message }
427
449
  meta[:backtrace] = ex.backtrace if logger.debug?
428
450
  logger.warn('Exception while parsing KV', meta)
429
- event.tag('_kv_filter_error')
451
+ event.tag(@tag_on_failure)
452
+ end
453
+
454
+ def close
455
+ @timeout_enforcer.stop!
430
456
  end
431
457
 
432
458
  private
433
459
 
460
+ # @overload summarize(value)
461
+ # @param value [Array]
462
+ # @return [String]
463
+ # @overload summarize(value)
464
+ # @param value [String]
465
+ # @return [String]
466
+ def summarize(value)
467
+ if value.kind_of?(Array)
468
+ value.map(&:to_s).map do |entry|
469
+ summarize(entry)
470
+ end.to_s
471
+ end
472
+
473
+ value = value.to_s
474
+
475
+ value.bytesize < 255 ? "`#{value}`" : "entry too large; first 255 chars are `#{value[0..255].dump}`"
476
+ end
477
+
478
+ def initialize_timeout_enforcer
479
+ return NULL_TIMEOUT_ENFORCER if @timeout_millis <= 0
480
+
481
+ TimeoutEnforcer.new(logger, @timeout_millis * 1_000_000)
482
+ end
483
+
434
484
  def has_value_splitter?(s)
435
485
  s =~ @value_split_re
436
486
  end
@@ -487,9 +537,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
487
537
  end
488
538
  end
489
539
 
540
+ # Parses the given `text`, using the `event` for context, into the provided `kv_keys` hash
541
+ #
542
+ # @param text [String]: the text to parse
543
+ # @param event [LogStash::Event]: the event from which to extract context (e.g., sprintf vs (in|ex)clude keys)
544
+ # @param kv_keys [Hash{String=>Object}]: the hash in which to inject found key/value pairs
545
+ #
546
+ # @return [void]
490
547
  def parse(text, event, kv_keys)
491
548
  # short circuit parsing if the text does not contain the @value_split
492
- return kv_keys unless has_value_splitter?(text)
549
+ return unless has_value_splitter?(text)
493
550
 
494
551
  # Interpret dynamic keys for @include_keys and @exclude_keys
495
552
  include_keys = @include_keys.map{|key| event.sprintf(key)}
@@ -520,7 +577,8 @@ class LogStash::Filters::KV < LogStash::Filters::Base
520
577
 
521
578
  # recursively get more kv pairs from the value
522
579
  if @recursive
523
- innerKv = parse(value, event, {})
580
+ innerKv = {}
581
+ parse(value, event, innerKv)
524
582
  value = innerKv unless innerKv.empty?
525
583
  end
526
584
 
@@ -534,7 +592,95 @@ class LogStash::Filters::KV < LogStash::Filters::Base
534
592
  kv_keys[key] = value
535
593
  end
536
594
  end
595
+ end
596
+
597
+ class TimeoutException < RuntimeError
598
+ end
599
+
600
+ class TimeoutEnforcer
601
+ def initialize(logger, timeout_nanos)
602
+ @logger = logger
603
+ @running = java.util.concurrent.atomic.AtomicBoolean.new(false)
604
+ @timeout_nanos = timeout_nanos
537
605
 
538
- return kv_keys
606
+ # Stores running matches with their start time, this is used to cancel long running matches
607
+ # Is a map of Thread => start_time
608
+ @threads_to_start_time = java.util.concurrent.ConcurrentHashMap.new
609
+ end
610
+
611
+ def execute(&block)
612
+ begin
613
+ thread = java.lang.Thread.currentThread()
614
+ @threads_to_start_time.put(thread, java.lang.System.nanoTime)
615
+
616
+ yield
617
+
618
+ rescue InterruptedRegexpError, java.lang.InterruptedException => e
619
+ raise TimeoutException.new
620
+ ensure
621
+ # If the block finished, but interrupt was called after, we'll want to
622
+ # clear the interrupted status anyway
623
+ @threads_to_start_time.remove(thread)
624
+ thread.interrupted
625
+ end
626
+ end
627
+
628
+ def start!
629
+ @running.set(true)
630
+ @logger.debug("Starting timeout enforcer (#{@timeout_nanos}ns)")
631
+ @timer_thread = Thread.new do
632
+ while @running.get()
633
+ begin
634
+ cancel_timed_out!
635
+ rescue Exception => e
636
+ @logger.error("Error while attempting to check/cancel excessively long kv patterns",
637
+ :message => e.message,
638
+ :class => e.class.name,
639
+ :backtrace => e.backtrace
640
+ )
641
+ end
642
+ sleep 0.25
643
+ end
644
+ end
645
+ end
646
+
647
+ def stop!
648
+ @running.set(false)
649
+ @logger.debug("Shutting down timeout enforcer")
650
+ # Check for the thread mostly for a fast start/shutdown scenario
651
+ @timer_thread.join if @timer_thread
652
+ end
653
+
654
+ private
655
+
656
+ def cancel_timed_out!
657
+ now = java.lang.System.nanoTime # save ourselves some nanotime calls
658
+ @threads_to_start_time.keySet.each do |thread|
659
+ # Use compute to lock this value
660
+ @threads_to_start_time.computeIfPresent(thread) do |thread, start_time|
661
+ if start_time < now && now - start_time > @timeout_nanos
662
+ thread.interrupt
663
+ nil # Delete the key
664
+ else
665
+ start_time # preserve the key
666
+ end
667
+ end
668
+ end
669
+ end
670
+ end
671
+
672
+ class NullTimeoutEnforcer
673
+ def execute(&block)
674
+ yield
675
+ end
676
+
677
+ def start!
678
+ # no-op
679
+ end
680
+
681
+ def stop!
682
+ # no-op
683
+ end
539
684
  end
685
+ NULL_TIMEOUT_ENFORCER = NullTimeoutEnforcer.new
540
686
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.2.1'
4
+ s.version = '4.3.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -1093,5 +1093,131 @@ context 'runtime errors' do
1093
1093
 
1094
1094
  plugin.filter(event)
1095
1095
  end
1096
+ context 'when a custom tag is defined' do
1097
+ let(:options) { super().merge("tag_on_failure" => "KV-ERROR")}
1098
+ it 'tags the event with the custom tag' do
1099
+ plugin.filter(event)
1100
+ expect(event.get('tags')).to_not be_nil
1101
+ expect(event.get('tags')).to include('KV-ERROR')
1102
+ end
1103
+ end
1104
+ end
1105
+ end
1106
+
1107
+ # This group intentionally uses patterns that are vulnerable to pathological inputs to test timeouts.
1108
+ #
1109
+ # patterns of the form `/(?:x+x+)+y/` are vulnerable to inputs that have long sequences matching `/x/`
1110
+ # that are _not_ followed by a sequence matching `/y/`.
1111
+ context 'timeouts' do
1112
+ let(:options) do
1113
+ {
1114
+ "value_split_pattern" => "(?:=+=+)+:"
1115
+ }
1116
+ end
1117
+ subject(:plugin) do
1118
+ LogStash::Filters::KV.new(options).instance_exec { register; self }
1119
+ end
1120
+
1121
+ let(:data) { {"message" => message} }
1122
+ let(:event) { LogStash::Event.new(data) }
1123
+ let(:message) { "foo=bar hello=world" }
1124
+
1125
+ after(:each) { plugin.close }
1126
+
1127
+ # since we are dealing with potentially-pathological specs, ensure specs fail in a timely
1128
+ # manner if they block for longer than `spec_blocking_threshold_seconds`.
1129
+ let(:spec_blocking_threshold_seconds) { 10 }
1130
+ around(:each) do |example|
1131
+ begin
1132
+ blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
1133
+ Timeout.timeout(spec_blocking_threshold_seconds, blocking_exception_class, &example)
1134
+ rescue blocking_exception_class
1135
+ fail('execution blocked')
1136
+ end
1137
+ end
1138
+
1139
+ context 'when timeouts are enabled' do
1140
+ let(:options) { super().merge("timeout_millis" => 250) }
1141
+ let(:spec_blocking_threshold_seconds) { 3 }
1142
+
1143
+ context 'when given a pathological input' do
1144
+ let(:message) { "foo========:bar baz================================================bingo" }
1145
+
1146
+ it 'tags the event' do
1147
+ plugin.filter(event)
1148
+
1149
+ expect(event.get('tags')).to be_a_kind_of(Enumerable)
1150
+ expect(event.get('tags')).to include('_kv_filter_timeout')
1151
+ end
1152
+
1153
+ context 'when given a custom `tag_on_timeout`' do
1154
+ let(:options) { super().merge('tag_on_timeout' => 'BADKV') }
1155
+
1156
+ it 'tags the event with the custom tag' do
1157
+ plugin.filter(event)
1158
+
1159
+ expect(event.get('tags')).to be_a_kind_of(Enumerable)
1160
+ expect(event.get('tags')).to include('BADKV')
1161
+ end
1162
+ end
1163
+
1164
+ context 'when default_keys are provided' do
1165
+ let(:options) { super().merge("default_keys" => {"default" => "key"})}
1166
+
1167
+ it 'does not populate default keys' do
1168
+ plugin.filter(event)
1169
+
1170
+ expect(event).to_not include('default')
1171
+ end
1172
+ end
1173
+ context 'when filter_matched hooks are provided' do
1174
+ let(:options) { super().merge("add_field" => {"kv" => "success"})}
1175
+
1176
+ it 'does not call filter_matched hooks' do
1177
+ plugin.filter(event)
1178
+
1179
+ expect(event).to_not include('kv')
1180
+ end
1181
+ end
1182
+ end
1183
+
1184
+ context 'when given a non-pathological input' do
1185
+ let(:message) { "foo==:bar baz==:bingo" }
1186
+
1187
+ it 'extracts the k/v' do
1188
+ plugin.filter(event)
1189
+
1190
+ expect(event.get('foo')).to eq('bar')
1191
+ expect(event.get('baz')).to eq('bingo')
1192
+ end
1193
+ end
1194
+ end
1195
+
1196
+ context 'when timeouts are explicitly disabled' do
1197
+ let(:options) { super().merge("timeout_millis" => 0) }
1198
+
1199
+ context 'when given a pathological input' do
1200
+ let(:message) { "foo========:bar baz================================================================bingo"}
1201
+
1202
+ it 'blocks for at least 3 seconds' do
1203
+ blocking_exception_class = Class.new(::Exception) # avoid RuntimeError, which is handled in KV#filter
1204
+ expect do
1205
+ Timeout.timeout(3, blocking_exception_class) do
1206
+ plugin.filter(event)
1207
+ end
1208
+ end.to raise_exception(blocking_exception_class)
1209
+ end
1210
+ end
1211
+
1212
+ context 'when given a non-pathological input' do
1213
+ let(:message) { "foo==:bar baz==:bingo" }
1214
+
1215
+ it 'extracts the k/v' do
1216
+ plugin.filter(event)
1217
+
1218
+ expect(event.get('foo')).to eq('bar')
1219
+ expect(event.get('baz')).to eq('bingo')
1220
+ end
1221
+ end
1096
1222
  end
1097
1223
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.1
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-04 00:00:00.000000000 Z
11
+ date: 2019-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement