logstash-filter-kv 4.0.3 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/LICENSE +1 -1
- data/docs/index.asciidoc +40 -2
- data/lib/logstash/filters/kv.rb +105 -10
- data/logstash-filter-kv.gemspec +1 -1
- data/spec/filters/kv_spec.rb +126 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db1a1524254aafe6af64400a781c90fe02a0e82bcc037d8ce43b652de30dea93
|
4
|
+
data.tar.gz: 36443b889dfde58914512466fe3eadb703fca7c30b665a3d02106f5c6b91cada
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 072403211a806a928e240df180ab2180257b22579db3dd9de9d3ec4065f0e9b75374db3ade99c7e09ad0fc5b66f09701026ee97f81ec3a76c98826b50bd33e94
|
7
|
+
data.tar.gz: 8c7dd7066d669c4f0fea15942f14fc2c80a9412190e31bb0b36cb7382843b3f8839b843b89c72ba930eb04e697e7e5536b01603c78c0666db2b81a49948a0d3d
|
data/CHANGELOG.md
CHANGED
data/LICENSE
CHANGED
data/docs/index.asciidoc
CHANGED
@@ -135,7 +135,7 @@ To exclude `from` and `to`, but retain the `foo` key, you could use this configu
|
|
135
135
|
* Value type is <<string,string>>
|
136
136
|
* Default value is `" "`
|
137
137
|
|
138
|
-
A string of characters to use as delimiters for parsing out key-value pairs.
|
138
|
+
A string of characters to use as single-character field delimiters for parsing out key-value pairs.
|
139
139
|
|
140
140
|
These characters form a regex character class and thus you must escape special regex
|
141
141
|
characters like `[` or `]` using `\`.
|
@@ -160,6 +160,29 @@ fields:
|
|
160
160
|
* `oq: bobo`
|
161
161
|
* `ss: 12345`
|
162
162
|
|
163
|
+
[id="plugins-{type}s-{plugin}-field_split_pattern"]
|
164
|
+
===== `field_split_pattern`
|
165
|
+
|
166
|
+
* Value type is <<string,string>>
|
167
|
+
* There is no default value for this setting.
|
168
|
+
|
169
|
+
A regex expression to use as field delimiter for parsing out key-value pairs.
|
170
|
+
Useful to define multi-character field delimiters.
|
171
|
+
Setting the `field_split_pattern` options will take precedence over the `field_split` option.
|
172
|
+
|
173
|
+
Note that you should avoid using captured groups in your regex and you should be
|
174
|
+
cautious with lookaheads or lookbehinds and positional anchors.
|
175
|
+
|
176
|
+
For example, to split fields on a repetition of one or more colons
|
177
|
+
`k1=v1:k2=v2::k3=v3:::k4=v4`:
|
178
|
+
[source,ruby]
|
179
|
+
filter { kv { field_split_pattern => ":+" } }
|
180
|
+
|
181
|
+
To split fields on a regex character that need escaping like the plus sign
|
182
|
+
`k1=v1++k2=v2++k3=v3++k4=v4`:
|
183
|
+
[source,ruby]
|
184
|
+
filter { kv { field_split_pattern => "\\+\\+" } }
|
185
|
+
|
163
186
|
[id="plugins-{type}s-{plugin}-include_brackets"]
|
164
187
|
===== `include_brackets`
|
165
188
|
|
@@ -393,7 +416,7 @@ For example, to trim `<`, `>`, `[`, `]` and `,` characters from values:
|
|
393
416
|
* Value type is <<string,string>>
|
394
417
|
* Default value is `"="`
|
395
418
|
|
396
|
-
A non-empty string of characters to use as delimiters for
|
419
|
+
A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
|
397
420
|
|
398
421
|
These characters form a regex character class and thus you must escape special regex
|
399
422
|
characters like `[` or `]` using `\`.
|
@@ -404,6 +427,21 @@ For example, to identify key-values such as
|
|
404
427
|
filter { kv { value_split => ":" } }
|
405
428
|
|
406
429
|
|
430
|
+
[id="plugins-{type}s-{plugin}-value_split_pattern"]
|
431
|
+
===== `value_split_pattern`
|
432
|
+
|
433
|
+
* Value type is <<string,string>>
|
434
|
+
* There is no default value for this setting.
|
435
|
+
|
436
|
+
A regex expression to use as value delimiter for parsing out key-value pairs.
|
437
|
+
Useful to define multi-character value delimiters.
|
438
|
+
Setting the `value_split_pattern` options will take precedence over the `value_split option`.
|
439
|
+
|
440
|
+
Note that you should avoid using captured groups in your regex and you should be
|
441
|
+
cautious with lookaheads or lookbehinds and positional anchors.
|
442
|
+
|
443
|
+
See `field_split_pattern` for examples.
|
444
|
+
|
407
445
|
|
408
446
|
[id="plugins-{type}s-{plugin}-common-options"]
|
409
447
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/kv.rb
CHANGED
@@ -123,7 +123,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
123
123
|
# }
|
124
124
|
config :transform_key, :validate => [TRANSFORM_LOWERCASE_KEY, TRANSFORM_UPPERCASE_KEY, TRANSFORM_CAPITALIZE_KEY]
|
125
125
|
|
126
|
-
# A string of characters to use as delimiters for parsing out key-value pairs.
|
126
|
+
# A string of characters to use as single-character field delimiters for parsing out key-value pairs.
|
127
127
|
#
|
128
128
|
# These characters form a regex character class and thus you must escape special regex
|
129
129
|
# characters like `[` or `]` using `\`.
|
@@ -149,8 +149,25 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
149
149
|
# * `ss: 12345`
|
150
150
|
config :field_split, :validate => :string, :default => ' '
|
151
151
|
|
152
|
+
# A regex expression to use as field delimiter for parsing out key-value pairs.
|
153
|
+
# Useful to define multi-character field delimiters.
|
154
|
+
# Setting the field_split_pattern options will take precedence over the field_split option.
|
155
|
+
#
|
156
|
+
# Note that you should avoid using captured groups in your regex and you should be
|
157
|
+
# cautious with lookaheads or lookbehinds and positional anchors.
|
158
|
+
#
|
159
|
+
# For example, to split fields on a repetition of one or more colons
|
160
|
+
# `k1=v1:k2=v2::k3=v3:::k4=v4`:
|
161
|
+
# [source,ruby]
|
162
|
+
# filter { kv { field_split_pattern => ":+" } }
|
163
|
+
#
|
164
|
+
# To split fields on a regex character that need escaping like the plus sign
|
165
|
+
# `k1=v1++k2=v2++k3=v3++k4=v4`:
|
166
|
+
# [source,ruby]
|
167
|
+
# filter { kv { field_split_pattern => "\\+\\+" } }
|
168
|
+
config :field_split_pattern, :validate => :string
|
152
169
|
|
153
|
-
# A non-empty string of characters to use as delimiters for
|
170
|
+
# A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
|
154
171
|
#
|
155
172
|
# These characters form a regex character class and thus you must escape special regex
|
156
173
|
# characters like `[` or `]` using `\`.
|
@@ -161,6 +178,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
161
178
|
# filter { kv { value_split => ":" } }
|
162
179
|
config :value_split, :validate => :string, :default => '='
|
163
180
|
|
181
|
+
# A regex expression to use as value delimiter for parsing out key-value pairs.
|
182
|
+
# Useful to define multi-character value delimiters.
|
183
|
+
# Setting the value_split_pattern options will take precedence over the value_split option.
|
184
|
+
#
|
185
|
+
# Note that you should avoid using captured groups in your regex and you should be
|
186
|
+
# cautious with lookaheads or lookbehinds and positional anchors.
|
187
|
+
#
|
188
|
+
# See field_split_pattern for examples.
|
189
|
+
config :value_split_pattern, :validate => :string
|
190
|
+
|
164
191
|
# A string to prepend to all of the extracted keys.
|
165
192
|
#
|
166
193
|
# For example, to prepend arg_ to all keys:
|
@@ -279,24 +306,70 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
279
306
|
def register
|
280
307
|
if @value_split.empty?
|
281
308
|
raise LogStash::ConfigurationError, I18n.t(
|
282
|
-
"logstash.
|
309
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
283
310
|
:plugin => "filter",
|
284
311
|
:type => "kv",
|
285
312
|
:error => "Configuration option 'value_split' must be a non-empty string"
|
286
313
|
)
|
287
314
|
end
|
288
315
|
|
316
|
+
if @field_split_pattern && @field_split_pattern.empty?
|
317
|
+
raise LogStash::ConfigurationError, I18n.t(
|
318
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
319
|
+
:plugin => "filter",
|
320
|
+
:type => "kv",
|
321
|
+
:error => "Configuration option 'field_split_pattern' must be a non-empty string"
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
if @value_split_pattern && @value_split_pattern.empty?
|
326
|
+
raise LogStash::ConfigurationError, I18n.t(
|
327
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
328
|
+
:plugin => "filter",
|
329
|
+
:type => "kv",
|
330
|
+
:error => "Configuration option 'value_split_pattern' must be a non-empty string"
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
289
334
|
@trim_value_re = Regexp.new("^[#{@trim_value}]|[#{@trim_value}]$") if @trim_value
|
290
335
|
@trim_key_re = Regexp.new("^[#{@trim_key}]|[#{@trim_key}]$") if @trim_key
|
291
336
|
|
292
337
|
@remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
|
293
338
|
@remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
|
294
339
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
340
|
+
field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
|
341
|
+
value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
|
342
|
+
|
343
|
+
optional_whitespace = /\s*/
|
344
|
+
eof = /$/
|
345
|
+
|
346
|
+
value_pattern = begin
|
347
|
+
# each component expression within value_pattern _must_ capture exactly once.
|
348
|
+
value_patterns = []
|
349
|
+
|
350
|
+
value_patterns << quoted_capture(%q(")) # quoted double
|
351
|
+
value_patterns << quoted_capture(%q(')) # quoted single
|
352
|
+
if @include_brackets
|
353
|
+
value_patterns << quoted_capture('(', ')') # bracketed paren
|
354
|
+
value_patterns << quoted_capture('[', ']') # bracketed square
|
355
|
+
value_patterns << quoted_capture('<', '>') # bracketed angle
|
356
|
+
end
|
357
|
+
|
358
|
+
# an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
|
359
|
+
value_patterns << /((?:\\ |.)+?)(?=#{Regexp::union(field_split, eof)})/
|
360
|
+
|
361
|
+
Regexp.union(*value_patterns)
|
362
|
+
end
|
363
|
+
|
364
|
+
|
365
|
+
# a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
|
366
|
+
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
+
key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
|
368
|
+
|
369
|
+
@scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
|
370
|
+
@value_split_re = value_split
|
371
|
+
|
372
|
+
@logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
|
300
373
|
end
|
301
374
|
|
302
375
|
def filter(event)
|
@@ -335,6 +408,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
335
408
|
s =~ @value_split_re
|
336
409
|
end
|
337
410
|
|
411
|
+
# Helper function for generating single-capture `Regexp` that, when matching a string bound by the given quotes
|
412
|
+
# or brackets, will capture the content that is between the quotes or brackets.
|
413
|
+
#
|
414
|
+
# @api private
|
415
|
+
# @param quote_sequence [String] a character sequence that begins a quoted expression
|
416
|
+
# @param close_quote_sequence [String] a character sequence that ends a quoted expression; (default: quote_sequence)
|
417
|
+
# @return [Regexp] with a single capture group representing content that is between the given quotes
|
418
|
+
def quoted_capture(quote_sequence, close_quote_sequence=quote_sequence)
|
419
|
+
fail('quote_sequence must be non-empty!') if quote_sequence.nil? || quote_sequence.empty?
|
420
|
+
fail('close_quote_sequence must be non-empty!') if close_quote_sequence.nil? || close_quote_sequence.empty?
|
421
|
+
|
422
|
+
open_pattern = /#{Regexp.quote(quote_sequence)}/
|
423
|
+
close_pattern = /#{Regexp.quote(close_quote_sequence)}/
|
424
|
+
|
425
|
+
# matches a sequence of zero or more characters that is followed by the `close_quote_sequence`
|
426
|
+
quoted_value_pattern = /(?:.)*?(?=#{Regexp.quote(close_quote_sequence)})/
|
427
|
+
|
428
|
+
/#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
|
429
|
+
end
|
430
|
+
|
338
431
|
def transform(text, method)
|
339
432
|
case method
|
340
433
|
when TRANSFORM_LOWERCASE_KEY
|
@@ -354,8 +447,10 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
354
447
|
include_keys = @include_keys.map{|key| event.sprintf(key)}
|
355
448
|
exclude_keys = @exclude_keys.map{|key| event.sprintf(key)}
|
356
449
|
|
357
|
-
text.scan(@scan_re) do |key,
|
358
|
-
value =
|
450
|
+
text.scan(@scan_re) do |key, *value_candidates|
|
451
|
+
value = value_candidates.compact.first
|
452
|
+
next if value.nil? || value.empty?
|
453
|
+
|
359
454
|
key = @trim_key ? key.gsub(@trim_key_re, "") : key
|
360
455
|
key = @remove_char_key ? key.gsub(@remove_char_key_re, "") : key
|
361
456
|
key = @transform_key ? transform(key, @transform_key) : key
|
data/logstash-filter-kv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-kv'
|
4
|
-
s.version = '4.0
|
4
|
+
s.version = '4.1.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses key-value pairs"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/kv_spec.rb
CHANGED
@@ -772,3 +772,129 @@ describe LogStash::Filters::KV do
|
|
772
772
|
end
|
773
773
|
end
|
774
774
|
end
|
775
|
+
|
776
|
+
describe "multi character splitting" do
|
777
|
+
subject do
|
778
|
+
plugin = LogStash::Filters::KV.new(options)
|
779
|
+
plugin.register
|
780
|
+
plugin
|
781
|
+
end
|
782
|
+
|
783
|
+
let(:data) { {"message" => message} }
|
784
|
+
let(:event) { LogStash::Event.new(data) }
|
785
|
+
|
786
|
+
shared_examples "parsing all fields and values" do
|
787
|
+
it "parses all fields and values" do
|
788
|
+
subject.filter(event)
|
789
|
+
expect(event.get("hello")).to eq("world")
|
790
|
+
expect(event.get("foo")).to eq("bar")
|
791
|
+
expect(event.get("baz")).to eq("fizz")
|
792
|
+
expect(event.get("doublequoted")).to eq("hello world")
|
793
|
+
expect(event.get("singlequoted")).to eq("hello world")
|
794
|
+
expect(event.get("bracketsone")).to eq("hello world")
|
795
|
+
expect(event.get("bracketstwo")).to eq("hello world")
|
796
|
+
expect(event.get("bracketsthree")).to eq("hello world")
|
797
|
+
end
|
798
|
+
end
|
799
|
+
|
800
|
+
context "empty value_split_pattern" do
|
801
|
+
let(:options) { { "value_split_pattern" => "" } }
|
802
|
+
it "should raise ConfigurationError" do
|
803
|
+
expect{subject}.to raise_error(LogStash::ConfigurationError)
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
context "empty field_split_pattern" do
|
808
|
+
let(:options) { { "field_split_pattern" => "" } }
|
809
|
+
it "should raise ConfigurationError" do
|
810
|
+
expect{subject}.to raise_error(LogStash::ConfigurationError)
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
context "single split" do
|
815
|
+
let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
|
816
|
+
let(:options) {
|
817
|
+
{
|
818
|
+
"field_split" => " ",
|
819
|
+
"value_split" => ":",
|
820
|
+
}
|
821
|
+
}
|
822
|
+
it_behaves_like "parsing all fields and values"
|
823
|
+
end
|
824
|
+
|
825
|
+
context "value split multi" do
|
826
|
+
let(:message) { "hello::world foo::bar baz::fizz doublequoted::\"hello world\" singlequoted::'hello world' bracketsone::(hello world) bracketstwo::[hello world] bracketsthree::<hello world>" }
|
827
|
+
let(:options) {
|
828
|
+
{
|
829
|
+
"field_split" => " ",
|
830
|
+
"value_split_pattern" => "::",
|
831
|
+
}
|
832
|
+
}
|
833
|
+
it_behaves_like "parsing all fields and values"
|
834
|
+
end
|
835
|
+
|
836
|
+
context "field and value split multi" do
|
837
|
+
let(:message) { "hello::world__foo::bar__baz::fizz__doublequoted::\"hello world\"__singlequoted::'hello world'__bracketsone::(hello world)__bracketstwo::[hello world]__bracketsthree::<hello world>" }
|
838
|
+
let(:options) {
|
839
|
+
{
|
840
|
+
"field_split_pattern" => "__",
|
841
|
+
"value_split_pattern" => "::",
|
842
|
+
}
|
843
|
+
}
|
844
|
+
it_behaves_like "parsing all fields and values"
|
845
|
+
end
|
846
|
+
|
847
|
+
context "field and value split multi with regex" do
|
848
|
+
let(:message) { "hello:world_foo::bar__baz:::fizz___doublequoted:::\"hello world\"____singlequoted:::::'hello world'____bracketsone:::(hello world)__bracketstwo:[hello world]_bracketsthree::::::<hello world>" }
|
849
|
+
let(:options) {
|
850
|
+
{
|
851
|
+
"field_split_pattern" => "_+",
|
852
|
+
"value_split_pattern" => ":+",
|
853
|
+
}
|
854
|
+
}
|
855
|
+
it_behaves_like "parsing all fields and values"
|
856
|
+
end
|
857
|
+
|
858
|
+
context "field and value split multi using singe char" do
|
859
|
+
let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
|
860
|
+
let(:options) {
|
861
|
+
{
|
862
|
+
"field_split_pattern" => " ",
|
863
|
+
"value_split_pattern" => ":",
|
864
|
+
}
|
865
|
+
}
|
866
|
+
it_behaves_like "parsing all fields and values"
|
867
|
+
end
|
868
|
+
|
869
|
+
context "field and value split multi using escaping" do
|
870
|
+
let(:message) { "hello++world??foo++bar??baz++fizz??doublequoted++\"hello world\"??singlequoted++'hello world'??bracketsone++(hello world)??bracketstwo++[hello world]??bracketsthree++<hello world>" }
|
871
|
+
let(:options) {
|
872
|
+
{
|
873
|
+
"field_split_pattern" => "\\?\\?",
|
874
|
+
"value_split_pattern" => "\\+\\+",
|
875
|
+
}
|
876
|
+
}
|
877
|
+
it_behaves_like "parsing all fields and values"
|
878
|
+
end
|
879
|
+
|
880
|
+
|
881
|
+
context "example from @guyboertje in #15" do
|
882
|
+
let(:message) { 'key1: val1; key2: val2; key3: https://site/?g={......"...; CLR rv:11.0)"..}; key4: val4;' }
|
883
|
+
let(:options) {
|
884
|
+
{
|
885
|
+
"field_split_pattern" => ";\s*(?=key.+?:)|;$",
|
886
|
+
"value_split_pattern" => ":\s+",
|
887
|
+
}
|
888
|
+
}
|
889
|
+
|
890
|
+
it "parses all fields and values" do
|
891
|
+
subject.filter(event)
|
892
|
+
|
893
|
+
expect(event.get("key1")).to eq("val1")
|
894
|
+
expect(event.get("key2")).to eq("val2")
|
895
|
+
expect(event.get("key3")).to eq("https://site/?g={......\"...; CLR rv:11.0)\"..}")
|
896
|
+
expect(event.get("key4")).to eq("val4")
|
897
|
+
end
|
898
|
+
end
|
899
|
+
|
900
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-kv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0
|
4
|
+
version: 4.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
84
|
version: '0'
|
85
85
|
requirements: []
|
86
86
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.6.
|
87
|
+
rubygems_version: 2.6.13
|
88
88
|
signing_key:
|
89
89
|
specification_version: 4
|
90
90
|
summary: Parses key-value pairs
|