logstash-filter-kv 4.1.2 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/index.asciidoc +18 -0
- data/lib/logstash/filters/kv.rb +48 -15
- data/logstash-filter-kv.gemspec +1 -1
- data/spec/filters/kv_spec.rb +77 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60449ebe2807038eecb8421ac199171dbdb021c5ed8207d5fa37ffa5904c27eb
|
4
|
+
data.tar.gz: 9f9b20456f61eae245a69cdcad9adc86f917711d14cd96cc1e8f8d3317caf038
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee5f0e5a66cf9a88d70d757e7b729636a4f600d9db8022e0eeabd1220ff80065e62221dca5941efe38fdf9cb059eef026fba663a1c045ff24008c98bf985dfd2
|
7
|
+
data.tar.gz: 5894f8dfe7231d2a77e6a262f0422be1768613147d8a27cb565a08474dab95ec9026405f4fdbd76683ac7011271464361f98e7f3e65b65e358b81a97e9c4ba7d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 4.2.0
|
2
|
+
- Added `whitespace => strict` mode, which allows the parser to behave more predictably when input is known to avoid unnecessary whitespace.
|
3
|
+
- Added error handling, which tags the event with `_kv_filter_error` if an exception is raised while handling an event instead of allowing the plugin to crash.
|
4
|
+
|
1
5
|
## 4.1.2
|
2
6
|
- bugfix: improves trim_key and trim_value to trim any _sequence_ of matching characters from the beginning and ends of the corresponding keys and values; a previous implementation limitited trim to a single character from each end, which was surprising.
|
3
7
|
- bugfix: fixes issue where we can fail to correctly break up a sequence that includes a partially-quoted value followed by another fully-quoted value by slightly reducing greediness of quoted-value captures.
|
data/docs/index.asciidoc
CHANGED
@@ -71,6 +71,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
71
71
|
| <<plugins-{type}s-{plugin}-trim_value>> |<<string,string>>|No
|
72
72
|
| <<plugins-{type}s-{plugin}-value_split>> |<<string,string>>|No
|
73
73
|
| <<plugins-{type}s-{plugin}-value_split_pattern>> |<<string,string>>|No
|
74
|
+
| <<plugins-{type}s-{plugin}-whitespace>> |<<string,string>>, one of `["strict", "lenient"]`|No
|
74
75
|
|=======================================================================
|
75
76
|
|
76
77
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -444,6 +445,23 @@ cautious with lookaheads or lookbehinds and positional anchors.
|
|
444
445
|
|
445
446
|
See `field_split_pattern` for examples.
|
446
447
|
|
448
|
+
[id="plugins-{type}s-{plugin}-whitespace"]
|
449
|
+
===== `whitespace`
|
450
|
+
|
451
|
+
* Value can be any of: `lenient`, `strict`
|
452
|
+
* Default value is `lenient`
|
453
|
+
|
454
|
+
An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
|
455
|
+
whitespace surrounding the configured value-split sequence.
|
456
|
+
|
457
|
+
By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
|
458
|
+
after the value-splitter. While this allows the plugin to make reasonable guesses with most
|
459
|
+
input, in some situations it may be too lenient.
|
460
|
+
|
461
|
+
You may want to enable `whitespace => strict` mode if you have control of the input data and
|
462
|
+
can guarantee that no extra spaces are added surrounding the pattern you have defined for
|
463
|
+
splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
|
464
|
+
a _value-splitter_ will be interpreted as an empty field.
|
447
465
|
|
448
466
|
[id="plugins-{type}s-{plugin}-common-options"]
|
449
467
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/kv.rb
CHANGED
@@ -303,6 +303,20 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
303
303
|
#
|
304
304
|
config :recursive, :validate => :boolean, :default => false
|
305
305
|
|
306
|
+
# An option specifying whether to be _lenient_ or _strict_ with the acceptance of unnecessary
|
307
|
+
# whitespace surrounding the configured value-split sequence.
|
308
|
+
#
|
309
|
+
# By default the plugin is run in `lenient` mode, which ignores spaces that occur before or
|
310
|
+
# after the value-splitter. While this allows the plugin to make reasonable guesses with most
|
311
|
+
# input, in some situations it may be too lenient.
|
312
|
+
#
|
313
|
+
# You may want to enable `whitespace => strict` mode if you have control of the input data and
|
314
|
+
# can guarantee that no extra spaces are added surrounding the pattern you have defined for
|
315
|
+
# splitting values. Doing so will ensure that a _field-splitter_ sequence immediately following
|
316
|
+
# a _value-splitter_ will be interpreted as an empty field.
|
317
|
+
#
|
318
|
+
config :whitespace, :validate => %w(strict lenient), :default => "lenient"
|
319
|
+
|
306
320
|
def register
|
307
321
|
if @value_split.empty?
|
308
322
|
raise LogStash::ConfigurationError, I18n.t(
|
@@ -337,12 +351,21 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
337
351
|
@remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
|
338
352
|
@remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
|
339
353
|
|
340
|
-
|
341
|
-
value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
|
342
|
-
|
343
|
-
optional_whitespace = /\s*/
|
354
|
+
optional_whitespace = / */
|
344
355
|
eof = /$/
|
345
356
|
|
357
|
+
field_split_pattern = Regexp::compile(@field_split_pattern || "[#{@field_split}]")
|
358
|
+
value_split_pattern = Regexp::compile(@value_split_pattern || "[#{@value_split}]")
|
359
|
+
|
360
|
+
# in legacy-compatible lenient mode, the value splitter can be wrapped in optional whitespace
|
361
|
+
if @whitespace == 'lenient'
|
362
|
+
value_split_pattern = /#{optional_whitespace}#{value_split_pattern}#{optional_whitespace}/
|
363
|
+
end
|
364
|
+
|
365
|
+
# a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
|
366
|
+
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
+
key_pattern = unquoted_capture(value_split_pattern, field_split_pattern, eof)
|
368
|
+
|
346
369
|
value_pattern = begin
|
347
370
|
# each component expression within value_pattern _must_ capture exactly once.
|
348
371
|
value_patterns = []
|
@@ -356,18 +379,13 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
356
379
|
end
|
357
380
|
|
358
381
|
# an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
|
359
|
-
value_patterns <<
|
382
|
+
value_patterns << unquoted_capture(field_split_pattern, eof)
|
360
383
|
|
361
|
-
Regexp.union(
|
384
|
+
Regexp.union(value_patterns)
|
362
385
|
end
|
363
386
|
|
364
|
-
|
365
|
-
|
366
|
-
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
-
key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
|
368
|
-
|
369
|
-
@scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
|
370
|
-
@value_split_re = value_split
|
387
|
+
@scan_re = /#{key_pattern}#{value_split_pattern}#{value_pattern}#{Regexp::union(field_split_pattern, eof)}/
|
388
|
+
@value_split_re = value_split_pattern
|
371
389
|
|
372
390
|
@logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
|
373
391
|
end
|
@@ -400,6 +418,11 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
400
418
|
end
|
401
419
|
|
402
420
|
filter_matched(event)
|
421
|
+
rescue => ex
|
422
|
+
meta = { :exception => ex.message }
|
423
|
+
meta[:backtrace] = ex.backtrace if logger.debug?
|
424
|
+
logger.warn('Exception while parsing KV', meta)
|
425
|
+
event.tag('_kv_filter_error')
|
403
426
|
end
|
404
427
|
|
405
428
|
private
|
@@ -423,9 +446,19 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
423
446
|
close_pattern = /#{Regexp.quote(close_quote_sequence)}/
|
424
447
|
|
425
448
|
# matches a sequence of zero or more characters are _not_ the `close_quote_sequence`
|
426
|
-
quoted_value_pattern =
|
449
|
+
quoted_value_pattern = unquoted_capture(close_pattern)
|
427
450
|
|
428
|
-
/#{open_pattern}
|
451
|
+
/#{open_pattern}#{quoted_value_pattern}?#{close_pattern}/
|
452
|
+
end
|
453
|
+
|
454
|
+
# Helper function for generating *capturing* `Regexp` that will match any sequence of characters that are either
|
455
|
+
# backslash-escaped OR *NOT* matching any of the given pattern(s)
|
456
|
+
#
|
457
|
+
# @api private
|
458
|
+
# @param *until_lookahead_patterns [Regexp]
|
459
|
+
# @return [Regexp]
|
460
|
+
def unquoted_capture(*until_lookahead_patterns)
|
461
|
+
/((?:\\.|(?!#{Regexp::union(until_lookahead_patterns)}).)+)/
|
429
462
|
end
|
430
463
|
|
431
464
|
def transform(text, method)
|
data/logstash-filter-kv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-kv'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.2.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses key-value pairs"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/kv_spec.rb
CHANGED
@@ -46,6 +46,48 @@ describe LogStash::Filters::KV do
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
+
describe 'whitespace => strict' do
|
50
|
+
config <<-CONFIG
|
51
|
+
filter {
|
52
|
+
kv {
|
53
|
+
whitespace => strict
|
54
|
+
}
|
55
|
+
}
|
56
|
+
CONFIG
|
57
|
+
|
58
|
+
context 'unquoted values' do
|
59
|
+
sample "IN=eth0 OUT= MAC=0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17 SRC=192.168.0.1" do
|
60
|
+
insist { subject.get('IN') } == 'eth0'
|
61
|
+
insist { subject.get('OUT') } == nil # when whitespace is strict, OUT is empty and thus uncaptured.
|
62
|
+
insist { subject.get('MAC') } == '0f:5f:5e:aa:d3:a2:21:ff:09:00:0f:e1:c8:17'
|
63
|
+
insist { subject.get('SRC') } == '192.168.0.1'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'mixed quotations' do
|
68
|
+
sample 'hello=world goodbye=cruel\\ world empty_quoted="" quoted="value1" empty_unquoted= unquoted=value2 empty_bracketed=[] bracketed=[value3] cake=delicious' do
|
69
|
+
insist { subject.get('hello') } == 'world'
|
70
|
+
insist { subject.get('goodbye') } == 'cruel\\ world'
|
71
|
+
insist { subject.get('empty_quoted') } == nil
|
72
|
+
insist { subject.get('quoted') } == 'value1'
|
73
|
+
insist { subject.get('empty_unquoted') } == nil
|
74
|
+
insist { subject.get('unquoted') } == 'value2'
|
75
|
+
insist { subject.get('empty_bracketed') } == nil
|
76
|
+
insist { subject.get('bracketed') } == 'value3'
|
77
|
+
insist { subject.get('cake') } == 'delicious'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context 'when given sloppy input, it extracts only the unambiguous bits' do
|
82
|
+
sample "hello = world foo =bar baz= fizz whitespace=none doublequoted = \"hello world\" singlequoted= 'hello world' brackets =(hello world) strict=true" do
|
83
|
+
insist { subject.get('whitespace') } == 'none'
|
84
|
+
insist { subject.get('strict') } == 'true'
|
85
|
+
|
86
|
+
insist { subject.to_hash.keys.sort } == %w(@timestamp @version message strict whitespace)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
49
91
|
describe "test transforming keys to lowercase and values to uppercase" do
|
50
92
|
config <<-CONFIG
|
51
93
|
filter {
|
@@ -1018,3 +1060,38 @@ describe "multi character splitting" do
|
|
1018
1060
|
end
|
1019
1061
|
end
|
1020
1062
|
end
|
1063
|
+
|
1064
|
+
context 'runtime errors' do
|
1065
|
+
|
1066
|
+
let(:options) { {} }
|
1067
|
+
let(:plugin) do
|
1068
|
+
LogStash::Filters::KV.new(options).instance_exec { register; self }
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
let(:data) { {"message" => message} }
|
1072
|
+
let(:event) { LogStash::Event.new(data) }
|
1073
|
+
let(:message) { "foo=bar hello=world" }
|
1074
|
+
|
1075
|
+
|
1076
|
+
before(:each) do
|
1077
|
+
expect(plugin).to receive(:parse) { fail('intentional') }
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
context 'when a runtime error is raised' do
|
1081
|
+
it 'does not cascade the exception to crash the plugin' do
|
1082
|
+
plugin.filter(event)
|
1083
|
+
end
|
1084
|
+
it 'tags the event with "_kv_filter_error"' do
|
1085
|
+
plugin.filter(event)
|
1086
|
+
expect(event.get('tags')).to_not be_nil
|
1087
|
+
expect(event.get('tags')).to include('_kv_filter_error')
|
1088
|
+
end
|
1089
|
+
it 'logs an informative message' do
|
1090
|
+
logger_double = double('Logger').as_null_object
|
1091
|
+
expect(plugin).to receive(:logger).and_return(logger_double).at_least(:once)
|
1092
|
+
expect(logger_double).to receive(:warn).with('Exception while parsing KV', anything)
|
1093
|
+
|
1094
|
+
plugin.filter(event)
|
1095
|
+
end
|
1096
|
+
end
|
1097
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-kv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|