logstash-filter-kv 4.1.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/index.asciidoc +18 -0
- data/lib/logstash/filters/kv.rb +48 -15
- data/logstash-filter-kv.gemspec +1 -1
- data/spec/filters/kv_spec.rb +77 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60449ebe2807038eecb8421ac199171dbdb021c5ed8207d5fa37ffa5904c27eb
|
4
|
+
data.tar.gz: 9f9b20456f61eae245a69cdcad9adc86f917711d14cd96cc1e8f8d3317caf038
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee5f0e5a66cf9a88d70d757e7b729636a4f600d9db8022e0eeabd1220ff80065e62221dca5941efe38fdf9cb059eef026fba663a1c045ff24008c98bf985dfd2
|
7
|
+
data.tar.gz: 5894f8dfe7231d2a77e6a262f0422be1768613147d8a27cb565a08474dab95ec9026405f4fdbd76683ac7011271464361f98e7f3e65b65e358b81a97e9c4ba7d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 4.2.0
|
2
|
+
- Added `whitespace => strict` mode, which allows the parser to behave more predictably when input is known to avoid unnecessary whitespace.
|
3
|
+
- Added error handling, which tags the event with `_kv_filter_error` if an exception is raised while handling an event instead of allowing the plugin to crash.
|
4
|
+
|
1
5
|
## 4.1.2
|
2
6
|
- bugfix: improves trim_key and trim_value to trim any _sequence_ of matching characters from the beginning and ends of the corresponding keys and values; a previous implementation limitited trim to a single character from each end, which was surprising.
|
3
7
|
- bugfix: fixes issue where we can fail to correctly break up a sequence that includes a partially-quoted value followed by another fully-quoted value by slightly reducing greediness of quoted-value captures.
|
data/docs/index.asciidoc
CHANGED
@@ -71,6 +71,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
71
71
|
| <<plugins-{type}s-{plugin}-trim_value>> |<<string,string>>|No
|
72
72
|
| <<plugins-{type}s-{plugin}-value_split>> |<<string,string>>|No
|
73
73
|
| <<plugins-{type}s-{plugin}-value_split_pattern>> |<<string,string>>|No
|
74
|
+
| <<plugins-{type}s-{plugin}-whitespace>> |<<string,string>>, one of `["strict", "lenient"]`|No
|
74
75
|
|=======================================================================
|
75
76
|
|
76
77
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -444,6 +445,23 @@ cautious with lookaheads or lookbehinds and positional anchors.
|
|
444
445
|
|
445
446
|
See `field_split_pattern` for examples.
|
446
447
|
|
448
|
+
[id="plugins-{type}s-{plugin}-whitespace"]
|
449
|
+
===== `whitespace`
|
450
|
+
|
451
|
+
* Value can be any of: `lenient`, `strict`
|
452
|
+
* Default value is `lenient`
|
453
|
+
|
454
|
+
An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
|
455
|
+
whitespace surrounding the configured value-split sequence.
|
456
|
+
|
457
|
+
By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
|
458
|
+
after the value-splitter. While this allows the plugin to make reasonable guesses with most
|
459
|
+
input, in some situations it may be too lenient.
|
460
|
+
|
461
|
+
You may want to enable `whitespace => strict` mode if you have control of the input data and
|
462
|
+
can guarantee that no extra spaces are added surrounding the pattern you have defined for
|
463
|
+
splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
|
464
|
+
a _value-splitter_ will be interpreted as an empty field.
|
447
465
|
|
448
466
|
[id="plugins-{type}s-{plugin}-common-options"]
|
449
467
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/kv.rb
CHANGED
@@ -303,6 +303,20 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
303
303
|
#
|
304
304
|
config :recursive, :validate => :boolean, :default => false
|
305
305
|
|
306
|
+
# An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
|
307
|
+
# whitespace surrounding the configured value-split sequence.
|
308
|
+
#
|
309
|
+
# By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
|
310
|
+
# after the value-splitter. While this allows the plugin to make reasonable guesses with most
|
311
|
+
# input, in some situations it may be too lenient.
|
312
|
+
#
|
313
|
+
# You may want to enable `whitespace => strict` mode if you have control of the input data and
|
314
|
+
# can guarantee that no extra spaces are added surrounding the pattern you have defined for
|
315
|
+
# splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
|
316
|
+
# a _value-splitter_ will be interpreted as an empty field.
|
317
|
+
#
|
318
|
+
config :whitespace, :validate => %w(strict lenient), :default => "lenient"
|
319
|
+
|
306
320
|
def register
|
307
321
|
if @value_split.empty?
|
308
322
|
raise LogStash::ConfigurationError, I18n.t(
|
@@ -337,12 +351,21 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
337
351
|
@remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
|
338
352
|
@remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
|
339
353
|
|
340
|
-
|
341
|
-
value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
|
342
|
-
|
343
|
-
optional_whitespace = /\s*/
|
354
|
+
optional_whitespace = / */
|
344
355
|
eof = /$/
|
345
356
|
|
357
|
+
field_split_pattern = Regexp::compile(@field_split_pattern || "[#{@field_split}]")
|
358
|
+
value_split_pattern = Regexp::compile(@value_split_pattern || "[#{@value_split}]")
|
359
|
+
|
360
|
+
# in legacy-compatible lenient mode, the value splitter can be wrapped in optional whitespace
|
361
|
+
if @whitespace == 'lenient'
|
362
|
+
value_split_pattern = /#{optional_whitespace}#{value_split_pattern}#{optional_whitespace}/
|
363
|
+
end
|
364
|
+
|
365
|
+
# a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
|
366
|
+
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
+
key_pattern = unquoted_capture(value_split_pattern, field_split_pattern, eof)
|
368
|
+
|
346
369
|
value_pattern = begin
|
347
370
|
# each component expression within value_pattern _must_ capture exactly once.
|
348
371
|
value_patterns = []
|
@@ -356,18 +379,13 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
356
379
|
end
|
357
380
|
|
358
381
|
# an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
|
359
|
-
value_patterns <<
|
382
|
+
value_patterns << unquoted_capture(field_split_pattern, eof)
|
360
383
|
|
361
|
-
Regexp.union(
|
384
|
+
Regexp.union(value_patterns)
|
362
385
|
end
|
363
386
|
|
364
|
-
|
365
|
-
|
366
|
-
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
-
key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
|
368
|
-
|
369
|
-
@scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
|
370
|
-
@value_split_re = value_split
|
387
|
+
@scan_re = /#{key_pattern}#{value_split_pattern}#{value_pattern}#{Regexp::union(field_split_pattern, eof)}/
|
388
|
+
@value_split_re = value_split_pattern
|
371
389
|
|
372
390
|
@logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
|
373
391
|
end
|
@@ -400,6 +418,11 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
400
418
|
end
|
401
419
|
|
402
420
|
filter_matched(event)
|
421
|
+
rescue => ex
|
422
|
+
meta = { :exception => ex.message }
|
423
|
+
meta[:backtrace] = ex.backtrace if logger.debug?
|
424
|
+
logger.warn('Exception while parsing KV', meta)
|
425
|
+
event.tag('_kv_filter_error')
|
403
426
|
end
|
404
427
|
|
405
428
|
private
|
@@ -423,9 +446,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
423
446
|
close_pattern = /#{Regexp.quote(close_quote_sequence)}/
|
424
447
|
|
425
448
|
# matches a sequence of zero or more characters are _not_ the `close_quote_sequence`
|
426
|
-
quoted_value_pattern =
|
449
|
+
quoted_value_pattern = unquoted_capture(close_pattern)
|
427
450
|
|
428
|
-
/#{open_pattern}
|
451
|
+
/#{open_pattern}#{quoted_value_pattern}?#{close_pattern}/
|
452
|
+
end
|
453
|
+
|
454
|
+
# Helper function for generating *capturing* `Regexp` that will match any sequence of characters that are either
|
455
|
+
# backslash-escaped OR *NOT* matching any of the given pattern(s)
|
456
|
+
#
|
457
|
+
# @api private
|
458
|
+
# @param *until_lookahead_patterns [Regexp]
|
459
|
+
# @return [Regexp]
|
460
|
+
def unquoted_capture(*until_lookahead_patterns)
|
461
|
+
/((?:\\.|(?!#{Regexp::union(until_lookahead_patterns)}).)+)/
|
429
462
|
end
|
430
463
|
|
431
464
|
def transform(text, method)
|
data/logstash-filter-kv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-kv'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.2.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses key-value pairs"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/kv_spec.rb
CHANGED
@@ -46,6 +46,48 @@ describe LogStash::Filters::KV do
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
+
describe 'whitespace => strict' do
|
50
|
+
config <<-CONFIG
|
51
|
+
filter {
|
52
|
+
kv {
|
53
|
+
whitespace => strict
|
54
|
+
}
|
55
|
+
}
|
56
|
+
CONFIG
|
57
|
+
|
58
|
+
context 'unquoted values' do
|
59
|
+
sample "IN=eth0 OUT= MAC=0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17 SRC=192.168.0.1" do
|
60
|
+
insist { subject.get('IN') } == 'eth0'
|
61
|
+
insist { subject.get('OUT') } == nil # when whitespace is strict, OUT is empty and thus uncaptured.
|
62
|
+
insist { subject.get('MAC') } == '0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17'
|
63
|
+
insist { subject.get('SRC') } == '192.168.0.1'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'mixed quotations' do
|
68
|
+
sample 'hello=world goodbye=cruel\\ world empty_quoted="" quoted="value1" empty_unquoted= unquoted=value2 empty_bracketed=[] bracketed=[value3] cake=delicious' do
|
69
|
+
insist { subject.get('hello') } == 'world'
|
70
|
+
insist { subject.get('goodbye') } == 'cruel\\ world'
|
71
|
+
insist { subject.get('empty_quoted') } == nil
|
72
|
+
insist { subject.get('quoted') } == 'value1'
|
73
|
+
insist { subject.get('empty_unquoted') } == nil
|
74
|
+
insist { subject.get('unquoted') } == 'value2'
|
75
|
+
insist { subject.get('empty_bracketed') } == nil
|
76
|
+
insist { subject.get('bracketed') } == 'value3'
|
77
|
+
insist { subject.get('cake') } == 'delicious'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context 'when given sloppy input, it extracts only the unambiguous bits' do
|
82
|
+
sample "hello = world foo =bar baz= fizz whitespace=none doublequoted = \"hello world\" singlequoted= 'hello world' brackets =(hello world) strict=true" do
|
83
|
+
insist { subject.get('whitespace') } == 'none'
|
84
|
+
insist { subject.get('strict') } == 'true'
|
85
|
+
|
86
|
+
insist { subject.to_hash.keys.sort } == %w(@timestamp @version message strict whitespace)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
49
91
|
describe "test transforming keys to lowercase and values to uppercase" do
|
50
92
|
config <<-CONFIG
|
51
93
|
filter {
|
@@ -1018,3 +1060,38 @@ describe "multi character splitting" do
|
|
1018
1060
|
end
|
1019
1061
|
end
|
1020
1062
|
end
|
1063
|
+
|
1064
|
+
context 'runtime errors' do
|
1065
|
+
|
1066
|
+
let(:options) { {} }
|
1067
|
+
let(:plugin) do
|
1068
|
+
LogStash::Filters::KV.new(options).instance_exec { register; self }
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
let(:data) { {"message" => message} }
|
1072
|
+
let(:event) { LogStash::Event.new(data) }
|
1073
|
+
let(:message) { "foo=bar hello=world" }
|
1074
|
+
|
1075
|
+
|
1076
|
+
before(:each) do
|
1077
|
+
expect(plugin).to receive(:parse) { fail('intentional') }
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
context 'when a runtime error is raised' do
|
1081
|
+
it 'does not cascade the exception to crash the plugin' do
|
1082
|
+
plugin.filter(event)
|
1083
|
+
end
|
1084
|
+
it 'tags the event with "_kv_filter_error"' do
|
1085
|
+
plugin.filter(event)
|
1086
|
+
expect(event.get('tags')).to_not be_nil
|
1087
|
+
expect(event.get('tags')).to include('_kv_filter_error')
|
1088
|
+
end
|
1089
|
+
it 'logs an informative message' do
|
1090
|
+
logger_double = double('Logger').as_null_object
|
1091
|
+
expect(plugin).to receive(:logger).and_return(logger_double).at_least(:once)
|
1092
|
+
expect(logger_double).to receive(:warn).with('Exception while parsing KV', anything)
|
1093
|
+
|
1094
|
+
plugin.filter(event)
|
1095
|
+
end
|
1096
|
+
end
|
1097
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-kv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|