logstash-filter-kv 4.1.2 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e108f33865f7afc5f0b397d3ba6ae37b93cf664c6e8f6a8d9bf74650c987954c
4
- data.tar.gz: aded17fb29c331fe4d68ae280d2ed884e968ae180e6539a313c66b8399952be4
3
+ metadata.gz: 60449ebe2807038eecb8421ac199171dbdb021c5ed8207d5fa37ffa5904c27eb
4
+ data.tar.gz: 9f9b20456f61eae245a69cdcad9adc86f917711d14cd96cc1e8f8d3317caf038
5
5
  SHA512:
6
- metadata.gz: e122414de884300d596a3d7da38f6408e1a1a42d32894013dd8c8fea533bcf18ac90619cc0ca544c68c552ab0a8e1c8891d1d15d2ee60aeb4c028b7f396443be
7
- data.tar.gz: d667872a513d33403b7b26e69a876787ae985c515dac645948f3536f67d7b04175ec727a7f76b97d804b3934f0ee8699cd7d849a1b93f692b3c067647331ed39
6
+ metadata.gz: ee5f0e5a66cf9a88d70d757e7b729636a4f600d9db8022e0eeabd1220ff80065e62221dca5941efe38fdf9cb059eef026fba663a1c045ff24008c98bf985dfd2
7
+ data.tar.gz: 5894f8dfe7231d2a77e6a262f0422be1768613147d8a27cb565a08474dab95ec9026405f4fdbd76683ac7011271464361f98e7f3e65b65e358b81a97e9c4ba7d
@@ -1,3 +1,7 @@
1
+ ## 4.2.0
2
+ - Added `whitespace => strict` mode, which allows the parser to behave more predictably when input is known to avoid unnecessary whitespace.
3
+ - Added error handling, which tags the event with `_kv_filter_error` if an exception is raised while handling an event instead of allowing the plugin to crash.
4
+
1
5
  ## 4.1.2
2
6
  - bugfix: improves trim_key and trim_value to trim any _sequence_ of matching characters from the beginning and ends of the corresponding keys and values; a previous implementation limitited trim to a single character from each end, which was surprising.
3
7
  - bugfix: fixes issue where we can fail to correctly break up a sequence that includes a partially-quoted value followed by another fully-quoted value by slightly reducing greediness of quoted-value captures.
@@ -71,6 +71,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
71
71
  | <<plugins-{type}s-{plugin}-trim_value>> |<<string,string>>|No
72
72
  | <<plugins-{type}s-{plugin}-value_split>> |<<string,string>>|No
73
73
  | <<plugins-{type}s-{plugin}-value_split_pattern>> |<<string,string>>|No
74
+ | <<plugins-{type}s-{plugin}-whitespace>> |<<string,string>>, one of `["strict", "lenient"]`|No
74
75
  |=======================================================================
75
76
 
76
77
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -444,6 +445,23 @@ cautious with lookaheads or lookbehinds and positional anchors.
444
445
 
445
446
  See `field_split_pattern` for examples.
446
447
 
448
+ [id="plugins-{type}s-{plugin}-whitespace"]
449
+ ===== `whitespace`
450
+
451
+ * Value can be any of: `lenient`, `strict`
452
+ * Default value is `lenient`
453
+
454
+ An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
455
+ whitespace surrounding the configured value-split sequence.
456
+
457
+ By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
458
+ after the value-splitter. While this allows the plugin to make reasonable guesses with most
459
+ input, in some situations it may be too lenient.
460
+
461
+ You may want to enable `whitespace => strict` mode if you have control of the input data and
462
+ can guarantee that no extra spaces are added surrounding the pattern you have defined for
463
+ splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
464
+ a _value-splitter_ will be interpreted as an empty field.
447
465
 
448
466
  [id="plugins-{type}s-{plugin}-common-options"]
449
467
  include::{include_path}/{type}.asciidoc[]
@@ -303,6 +303,20 @@ class LogStash::Filters::KV < LogStash::Filters::Base
303
303
  #
304
304
  config :recursive, :validate => :boolean, :default => false
305
305
 
306
+ # An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
307
+ # whitespace surrounding the configured value-split sequence.
308
+ #
309
+ # By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
310
+ # after the value-splitter. While this allows the plugin to make reasonable guesses with most
311
+ # input, in some situations it may be too lenient.
312
+ #
313
+ # You may want to enable `whitespace => strict` mode if you have control of the input data and
314
+ # can guarantee that no extra spaces are added surrounding the pattern you have defined for
315
+ # splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
316
+ # a _value-splitter_ will be interpreted as an empty field.
317
+ #
318
+ config :whitespace, :validate => %w(strict lenient), :default => "lenient"
319
+
306
320
  def register
307
321
  if @value_split.empty?
308
322
  raise LogStash::ConfigurationError, I18n.t(
@@ -337,12 +351,21 @@ class LogStash::Filters::KV < LogStash::Filters::Base
337
351
  @remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
338
352
  @remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
339
353
 
340
- field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
341
- value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
342
-
343
- optional_whitespace = /\s*/
354
+ optional_whitespace = / */
344
355
  eof = /$/
345
356
 
357
+ field_split_pattern = Regexp::compile(@field_split_pattern || "[#{@field_split}]")
358
+ value_split_pattern = Regexp::compile(@value_split_pattern || "[#{@value_split}]")
359
+
360
+ # in legacy-compatible lenient mode, the value splitter can be wrapped in optional whitespace
361
+ if @whitespace == 'lenient'
362
+ value_split_pattern = /#{optional_whitespace}#{value_split_pattern}#{optional_whitespace}/
363
+ end
364
+
365
+ # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
+ # and followed by either a `value_split`, a `field_split`, or EOF.
367
+ key_pattern = unquoted_capture(value_split_pattern, field_split_pattern, eof)
368
+
346
369
  value_pattern = begin
347
370
  # each component expression within value_pattern _must_ capture exactly once.
348
371
  value_patterns = []
@@ -356,18 +379,13 @@ class LogStash::Filters::KV < LogStash::Filters::Base
356
379
  end
357
380
 
358
381
  # an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
359
- value_patterns << /((?:\\ |.)*?)(?=#{Regexp::union(field_split, eof)})/
382
+ value_patterns << unquoted_capture(field_split_pattern, eof)
360
383
 
361
- Regexp.union(*value_patterns)
384
+ Regexp.union(value_patterns)
362
385
  end
363
386
 
364
-
365
- # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
- # and followed by either a `value_split`, a `field_split`, or EOF.
367
- key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
368
-
369
- @scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
370
- @value_split_re = value_split
387
+ @scan_re = /#{key_pattern}#{value_split_pattern}#{value_pattern}#{Regexp::union(field_split_pattern, eof)}/
388
+ @value_split_re = value_split_pattern
371
389
 
372
390
  @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
373
391
  end
@@ -400,6 +418,11 @@ class LogStash::Filters::KV < LogStash::Filters::Base
400
418
  end
401
419
 
402
420
  filter_matched(event)
421
+ rescue => ex
422
+ meta = { :exception => ex.message }
423
+ meta[:backtrace] = ex.backtrace if logger.debug?
424
+ logger.warn('Exception while parsing KV', meta)
425
+ event.tag('_kv_filter_error')
403
426
  end
404
427
 
405
428
  private
@@ -423,9 +446,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
423
446
  close_pattern = /#{Regexp.quote(close_quote_sequence)}/
424
447
 
425
448
  # matches a sequence of zero or more characters are _not_ the `close_quote_sequence`
426
- quoted_value_pattern = /[^#{Regexp.quote(close_quote_sequence)}]*/
449
+ quoted_value_pattern = unquoted_capture(close_pattern)
427
450
 
428
- /#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
451
+ /#{open_pattern}#{quoted_value_pattern}?#{close_pattern}/
452
+ end
453
+
454
+ # Helper function for generating *capturing* `Regexp` that will match any sequence of characters that are either
455
+ # backslash-escaped OR *NOT* matching any of the given pattern(s)
456
+ #
457
+ # @api private
458
+ # @param *until_lookahead_patterns [Regexp]
459
+ # @return [Regexp]
460
+ def unquoted_capture(*until_lookahead_patterns)
461
+ /((?:\\.|(?!#{Regexp::union(until_lookahead_patterns)}).)+)/
429
462
  end
430
463
 
431
464
  def transform(text, method)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.1.2'
4
+ s.version = '4.2.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -46,6 +46,48 @@ describe LogStash::Filters::KV do
46
46
  end
47
47
  end
48
48
 
49
+ describe 'whitespace => strict' do
50
+ config <<-CONFIG
51
+ filter {
52
+ kv {
53
+ whitespace => strict
54
+ }
55
+ }
56
+ CONFIG
57
+
58
+ context 'unquoted values' do
59
+ sample "IN=eth0 OUT= MAC=0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17 SRC=192.168.0.1" do
60
+ insist { subject.get('IN') } == 'eth0'
61
+ insist { subject.get('OUT') } == nil # when whitespace is strict, OUT is empty and thus uncaptured.
62
+ insist { subject.get('MAC') } == '0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17'
63
+ insist { subject.get('SRC') } == '192.168.0.1'
64
+ end
65
+ end
66
+
67
+ context 'mixed quotations' do
68
+ sample 'hello=world goodbye=cruel\\ world empty_quoted="" quoted="value1" empty_unquoted= unquoted=value2 empty_bracketed=[] bracketed=[value3] cake=delicious' do
69
+ insist { subject.get('hello') } == 'world'
70
+ insist { subject.get('goodbye') } == 'cruel\\ world'
71
+ insist { subject.get('empty_quoted') } == nil
72
+ insist { subject.get('quoted') } == 'value1'
73
+ insist { subject.get('empty_unquoted') } == nil
74
+ insist { subject.get('unquoted') } == 'value2'
75
+ insist { subject.get('empty_bracketed') } == nil
76
+ insist { subject.get('bracketed') } == 'value3'
77
+ insist { subject.get('cake') } == 'delicious'
78
+ end
79
+ end
80
+
81
+ context 'when given sloppy input, it extracts only the unambiguous bits' do
82
+ sample "hello = world foo =bar baz= fizz whitespace=none doublequoted = \"hello world\" singlequoted= 'hello world' brackets =(hello world) strict=true" do
83
+ insist { subject.get('whitespace') } == 'none'
84
+ insist { subject.get('strict') } == 'true'
85
+
86
+ insist { subject.to_hash.keys.sort } == %w(@timestamp @version message strict whitespace)
87
+ end
88
+ end
89
+ end
90
+
49
91
  describe "test transforming keys to lowercase and values to uppercase" do
50
92
  config <<-CONFIG
51
93
  filter {
@@ -1018,3 +1060,38 @@ describe "multi character splitting" do
1018
1060
  end
1019
1061
  end
1020
1062
  end
1063
+
1064
+ context 'runtime errors' do
1065
+
1066
+ let(:options) { {} }
1067
+ let(:plugin) do
1068
+ LogStash::Filters::KV.new(options).instance_exec { register; self }
1069
+ end
1070
+
1071
+ let(:data) { {"message" => message} }
1072
+ let(:event) { LogStash::Event.new(data) }
1073
+ let(:message) { "foo=bar hello=world" }
1074
+
1075
+
1076
+ before(:each) do
1077
+ expect(plugin).to receive(:parse) { fail('intentional') }
1078
+ end
1079
+
1080
+ context 'when a runtime error is raised' do
1081
+ it 'does not cascade the exception to crash the plugin' do
1082
+ plugin.filter(event)
1083
+ end
1084
+ it 'tags the event with "_kv_filter_error"' do
1085
+ plugin.filter(event)
1086
+ expect(event.get('tags')).to_not be_nil
1087
+ expect(event.get('tags')).to include('_kv_filter_error')
1088
+ end
1089
+ it 'logs an informative message' do
1090
+ logger_double = double('Logger').as_null_object
1091
+ expect(plugin).to receive(:logger).and_return(logger_double).at_least(:once)
1092
+ expect(logger_double).to receive(:warn).with('Exception while parsing KV', anything)
1093
+
1094
+ plugin.filter(event)
1095
+ end
1096
+ end
1097
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.2
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-16 00:00:00.000000000 Z
11
+ date: 2018-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement