logstash-filter-kv 4.1.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e108f33865f7afc5f0b397d3ba6ae37b93cf664c6e8f6a8d9bf74650c987954c
4
- data.tar.gz: aded17fb29c331fe4d68ae280d2ed884e968ae180e6539a313c66b8399952be4
3
+ metadata.gz: 60449ebe2807038eecb8421ac199171dbdb021c5ed8207d5fa37ffa5904c27eb
4
+ data.tar.gz: 9f9b20456f61eae245a69cdcad9adc86f917711d14cd96cc1e8f8d3317caf038
5
5
  SHA512:
6
- metadata.gz: e122414de884300d596a3d7da38f6408e1a1a42d32894013dd8c8fea533bcf18ac90619cc0ca544c68c552ab0a8e1c8891d1d15d2ee60aeb4c028b7f396443be
7
- data.tar.gz: d667872a513d33403b7b26e69a876787ae985c515dac645948f3536f67d7b04175ec727a7f76b97d804b3934f0ee8699cd7d849a1b93f692b3c067647331ed39
6
+ metadata.gz: ee5f0e5a66cf9a88d70d757e7b729636a4f600d9db8022e0eeabd1220ff80065e62221dca5941efe38fdf9cb059eef026fba663a1c045ff24008c98bf985dfd2
7
+ data.tar.gz: 5894f8dfe7231d2a77e6a262f0422be1768613147d8a27cb565a08474dab95ec9026405f4fdbd76683ac7011271464361f98e7f3e65b65e358b81a97e9c4ba7d
@@ -1,3 +1,7 @@
1
+ ## 4.2.0
2
+ - Added `whitespace => strict` mode, which allows the parser to behave more predictably when input is known to avoid unnecessary whitespace.
3
+ - Added error handling, which tags the event with `_kv_filter_error` if an exception is raised while handling an event instead of allowing the plugin to crash.
4
+
1
5
  ## 4.1.2
2
6
  - bugfix: improves trim_key and trim_value to trim any _sequence_ of matching characters from the beginning and ends of the corresponding keys and values; a previous implementation limitited trim to a single character from each end, which was surprising.
3
7
  - bugfix: fixes issue where we can fail to correctly break up a sequence that includes a partially-quoted value followed by another fully-quoted value by slightly reducing greediness of quoted-value captures.
@@ -71,6 +71,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
71
71
  | <<plugins-{type}s-{plugin}-trim_value>> |<<string,string>>|No
72
72
  | <<plugins-{type}s-{plugin}-value_split>> |<<string,string>>|No
73
73
  | <<plugins-{type}s-{plugin}-value_split_pattern>> |<<string,string>>|No
74
+ | <<plugins-{type}s-{plugin}-whitespace>> |<<string,string>>, one of `["strict", "lenient"]`|No
74
75
  |=======================================================================
75
76
 
76
77
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -444,6 +445,23 @@ cautious with lookaheads or lookbehinds and positional anchors.
444
445
 
445
446
  See `field_split_pattern` for examples.
446
447
 
448
+ [id="plugins-{type}s-{plugin}-whitespace"]
449
+ ===== `whitespace`
450
+
451
+ * Value can be any of: `lenient`, `strict`
452
+ * Default value is `lenient`
453
+
454
+ An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
455
+ whitespace surrounding the configured value-split sequence.
456
+
457
+ By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
458
+ after the value-splitter. While this allows the plugin to make reasonable guesses with most
459
+ input, in some situations it may be too lenient.
460
+
461
+ You may want to enable `whitespace => strict` mode if you have control of the input data and
462
+ can guarantee that no extra spaces are added surrounding the pattern you have defined for
463
+ splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
464
+ a _value-splitter_ will be interpreted as an empty field.
447
465
 
448
466
  [id="plugins-{type}s-{plugin}-common-options"]
449
467
  include::{include_path}/{type}.asciidoc[]
@@ -303,6 +303,20 @@ class LogStash::Filters::KV < LogStash::Filters::Base
303
303
  #
304
304
  config :recursive, :validate => :boolean, :default => false
305
305
 
306
+ # An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
307
+ # whitespace surrounding the configured value-split sequence.
308
+ #
309
+ # By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
310
+ # after the value-splitter. While this allows the plugin to make reasonable guesses with most
311
+ # input, in some situations it may be too lenient.
312
+ #
313
+ # You may want to enable `whitespace => strict` mode if you have control of the input data and
314
+ # can guarantee that no extra spaces are added surrounding the pattern you have defined for
315
+ # splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
316
+ # a _value-splitter_ will be interpreted as an empty field.
317
+ #
318
+ config :whitespace, :validate => %w(strict lenient), :default => "lenient"
319
+
306
320
  def register
307
321
  if @value_split.empty?
308
322
  raise LogStash::ConfigurationError, I18n.t(
@@ -337,12 +351,21 @@ class LogStash::Filters::KV < LogStash::Filters::Base
337
351
  @remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
338
352
  @remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
339
353
 
340
- field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
341
- value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
342
-
343
- optional_whitespace = /\s*/
354
+ optional_whitespace = / */
344
355
  eof = /$/
345
356
 
357
+ field_split_pattern = Regexp::compile(@field_split_pattern || "[#{@field_split}]")
358
+ value_split_pattern = Regexp::compile(@value_split_pattern || "[#{@value_split}]")
359
+
360
+ # in legacy-compatible lenient mode, the value splitter can be wrapped in optional whitespace
361
+ if @whitespace == 'lenient'
362
+ value_split_pattern = /#{optional_whitespace}#{value_split_pattern}#{optional_whitespace}/
363
+ end
364
+
365
+ # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
+ # and followed by either a `value_split`, a `field_split`, or EOF.
367
+ key_pattern = unquoted_capture(value_split_pattern, field_split_pattern, eof)
368
+
346
369
  value_pattern = begin
347
370
  # each component expression within value_pattern _must_ capture exactly once.
348
371
  value_patterns = []
@@ -356,18 +379,13 @@ class LogStash::Filters::KV < LogStash::Filters::Base
356
379
  end
357
380
 
358
381
  # an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
359
- value_patterns << /((?:\\ |.)*?)(?=#{Regexp::union(field_split, eof)})/
382
+ value_patterns << unquoted_capture(field_split_pattern, eof)
360
383
 
361
- Regexp.union(*value_patterns)
384
+ Regexp.union(value_patterns)
362
385
  end
363
386
 
364
-
365
- # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
- # and followed by either a `value_split`, a `field_split`, or EOF.
367
- key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
368
-
369
- @scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
370
- @value_split_re = value_split
387
+ @scan_re = /#{key_pattern}#{value_split_pattern}#{value_pattern}#{Regexp::union(field_split_pattern, eof)}/
388
+ @value_split_re = value_split_pattern
371
389
 
372
390
  @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
373
391
  end
@@ -400,6 +418,11 @@ class LogStash::Filters::KV < LogStash::Filters::Base
400
418
  end
401
419
 
402
420
  filter_matched(event)
421
+ rescue => ex
422
+ meta = { :exception => ex.message }
423
+ meta[:backtrace] = ex.backtrace if logger.debug?
424
+ logger.warn('Exception while parsing KV', meta)
425
+ event.tag('_kv_filter_error')
403
426
  end
404
427
 
405
428
  private
@@ -423,9 +446,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
423
446
  close_pattern = /#{Regexp.quote(close_quote_sequence)}/
424
447
 
425
448
  # matches a sequence of zero or more characters are _not_ the `close_quote_sequence`
426
- quoted_value_pattern = /[^#{Regexp.quote(close_quote_sequence)}]*/
449
+ quoted_value_pattern = unquoted_capture(close_pattern)
427
450
 
428
- /#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
451
+ /#{open_pattern}#{quoted_value_pattern}?#{close_pattern}/
452
+ end
453
+
454
+ # Helper function for generating *capturing* `Regexp` that will match any sequence of characters that are either
455
+ # backslash-escaped OR *NOT* matching any of the given pattern(s)
456
+ #
457
+ # @api private
458
+ # @param *until_lookahead_patterns [Regexp]
459
+ # @return [Regexp]
460
+ def unquoted_capture(*until_lookahead_patterns)
461
+ /((?:\\.|(?!#{Regexp::union(until_lookahead_patterns)}).)+)/
429
462
  end
430
463
 
431
464
  def transform(text, method)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.1.2'
4
+ s.version = '4.2.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -46,6 +46,48 @@ describe LogStash::Filters::KV do
46
46
  end
47
47
  end
48
48
 
49
+ describe 'whitespace => strict' do
50
+ config <<-CONFIG
51
+ filter {
52
+ kv {
53
+ whitespace => strict
54
+ }
55
+ }
56
+ CONFIG
57
+
58
+ context 'unquoted values' do
59
+ sample "IN=eth0 OUT= MAC=0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17 SRC=192.168.0.1" do
60
+ insist { subject.get('IN') } == 'eth0'
61
+ insist { subject.get('OUT') } == nil # when whitespace is strict, OUT is empty and thus uncaptured.
62
+ insist { subject.get('MAC') } == '0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17'
63
+ insist { subject.get('SRC') } == '192.168.0.1'
64
+ end
65
+ end
66
+
67
+ context 'mixed quotations' do
68
+ sample 'hello=world goodbye=cruel\\ world empty_quoted="" quoted="value1" empty_unquoted= unquoted=value2 empty_bracketed=[] bracketed=[value3] cake=delicious' do
69
+ insist { subject.get('hello') } == 'world'
70
+ insist { subject.get('goodbye') } == 'cruel\\ world'
71
+ insist { subject.get('empty_quoted') } == nil
72
+ insist { subject.get('quoted') } == 'value1'
73
+ insist { subject.get('empty_unquoted') } == nil
74
+ insist { subject.get('unquoted') } == 'value2'
75
+ insist { subject.get('empty_bracketed') } == nil
76
+ insist { subject.get('bracketed') } == 'value3'
77
+ insist { subject.get('cake') } == 'delicious'
78
+ end
79
+ end
80
+
81
+ context 'when given sloppy input, it extracts only the unambiguous bits' do
82
+ sample "hello = world foo =bar baz= fizz whitespace=none doublequoted = \"hello world\" singlequoted= 'hello world' brackets =(hello world) strict=true" do
83
+ insist { subject.get('whitespace') } == 'none'
84
+ insist { subject.get('strict') } == 'true'
85
+
86
+ insist { subject.to_hash.keys.sort } == %w(@timestamp @version message strict whitespace)
87
+ end
88
+ end
89
+ end
90
+
49
91
  describe "test transforming keys to lowercase and values to uppercase" do
50
92
  config <<-CONFIG
51
93
  filter {
@@ -1018,3 +1060,38 @@ describe "multi character splitting" do
1018
1060
  end
1019
1061
  end
1020
1062
  end
1063
+
1064
+ context 'runtime errors' do
1065
+
1066
+ let(:options) { {} }
1067
+ let(:plugin) do
1068
+ LogStash::Filters::KV.new(options).instance_exec { register; self }
1069
+ end
1070
+
1071
+ let(:data) { {"message" => message} }
1072
+ let(:event) { LogStash::Event.new(data) }
1073
+ let(:message) { "foo=bar hello=world" }
1074
+
1075
+
1076
+ before(:each) do
1077
+ expect(plugin).to receive(:parse) { fail('intentional') }
1078
+ end
1079
+
1080
+ context 'when a runtime error is raised' do
1081
+ it 'does not cascade the exception to crash the plugin' do
1082
+ plugin.filter(event)
1083
+ end
1084
+ it 'tags the event with "_kv_filter_error"' do
1085
+ plugin.filter(event)
1086
+ expect(event.get('tags')).to_not be_nil
1087
+ expect(event.get('tags')).to include('_kv_filter_error')
1088
+ end
1089
+ it 'logs an informative message' do
1090
+ logger_double = double('Logger').as_null_object
1091
+ expect(plugin).to receive(:logger).and_return(logger_double).at_least(:once)
1092
+ expect(logger_double).to receive(:warn).with('Exception while parsing KV', anything)
1093
+
1094
+ plugin.filter(event)
1095
+ end
1096
+ end
1097
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.2
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-16 00:00:00.000000000 Z
11
+ date: 2018-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement