logstash-filter-kv 4.0.3 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/LICENSE +1 -1
- data/docs/index.asciidoc +40 -2
- data/lib/logstash/filters/kv.rb +105 -10
- data/logstash-filter-kv.gemspec +1 -1
- data/spec/filters/kv_spec.rb +126 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db1a1524254aafe6af64400a781c90fe02a0e82bcc037d8ce43b652de30dea93
|
4
|
+
data.tar.gz: 36443b889dfde58914512466fe3eadb703fca7c30b665a3d02106f5c6b91cada
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 072403211a806a928e240df180ab2180257b22579db3dd9de9d3ec4065f0e9b75374db3ade99c7e09ad0fc5b66f09701026ee97f81ec3a76c98826b50bd33e94
|
7
|
+
data.tar.gz: 8c7dd7066d669c4f0fea15942f14fc2c80a9412190e31bb0b36cb7382843b3f8839b843b89c72ba930eb04e697e7e5536b01603c78c0666db2b81a49948a0d3d
|
data/CHANGELOG.md
CHANGED
data/LICENSE
CHANGED
data/docs/index.asciidoc
CHANGED
@@ -135,7 +135,7 @@ To exclude `from` and `to`, but retain the `foo` key, you could use this configu
|
|
135
135
|
* Value type is <<string,string>>
|
136
136
|
* Default value is `" "`
|
137
137
|
|
138
|
-
A string of characters to use as delimiters for parsing out key-value pairs.
|
138
|
+
A string of characters to use as single-character field delimiters for parsing out key-value pairs.
|
139
139
|
|
140
140
|
These characters form a regex character class and thus you must escape special regex
|
141
141
|
characters like `[` or `]` using `\`.
|
@@ -160,6 +160,29 @@ fields:
|
|
160
160
|
* `oq: bobo`
|
161
161
|
* `ss: 12345`
|
162
162
|
|
163
|
+
[id="plugins-{type}s-{plugin}-field_split_pattern"]
|
164
|
+
===== `field_split_pattern`
|
165
|
+
|
166
|
+
* Value type is <<string,string>>
|
167
|
+
* There is no default value for this setting.
|
168
|
+
|
169
|
+
A regex expression to use as field delimiter for parsing out key-value pairs.
|
170
|
+
Useful to define multi-character field delimiters.
|
171
|
+
Setting the `field_split_pattern` options will take precedence over the `field_split` option.
|
172
|
+
|
173
|
+
Note that you should avoid using captured groups in your regex and you should be
|
174
|
+
cautious with lookaheads or lookbehinds and positional anchors.
|
175
|
+
|
176
|
+
For example, to split fields on a repetition of one or more colons
|
177
|
+
`k1=v1:k2=v2::k3=v3:::k4=v4`:
|
178
|
+
[source,ruby]
|
179
|
+
filter { kv { field_split_pattern => ":+" } }
|
180
|
+
|
181
|
+
To split fields on a regex character that need escaping like the plus sign
|
182
|
+
`k1=v1++k2=v2++k3=v3++k4=v4`:
|
183
|
+
[source,ruby]
|
184
|
+
filter { kv { field_split_pattern => "\\+\\+" } }
|
185
|
+
|
163
186
|
[id="plugins-{type}s-{plugin}-include_brackets"]
|
164
187
|
===== `include_brackets`
|
165
188
|
|
@@ -393,7 +416,7 @@ For example, to trim `<`, `>`, `[`, `]` and `,` characters from values:
|
|
393
416
|
* Value type is <<string,string>>
|
394
417
|
* Default value is `"="`
|
395
418
|
|
396
|
-
A non-empty string of characters to use as delimiters for
|
419
|
+
A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
|
397
420
|
|
398
421
|
These characters form a regex character class and thus you must escape special regex
|
399
422
|
characters like `[` or `]` using `\`.
|
@@ -404,6 +427,21 @@ For example, to identify key-values such as
|
|
404
427
|
filter { kv { value_split => ":" } }
|
405
428
|
|
406
429
|
|
430
|
+
[id="plugins-{type}s-{plugin}-value_split_pattern"]
|
431
|
+
===== `value_split_pattern`
|
432
|
+
|
433
|
+
* Value type is <<string,string>>
|
434
|
+
* There is no default value for this setting.
|
435
|
+
|
436
|
+
A regex expression to use as value delimiter for parsing out key-value pairs.
|
437
|
+
Useful to define multi-character value delimiters.
|
438
|
+
Setting the `value_split_pattern` options will take precedence over the `value_split option`.
|
439
|
+
|
440
|
+
Note that you should avoid using captured groups in your regex and you should be
|
441
|
+
cautious with lookaheads or lookbehinds and positional anchors.
|
442
|
+
|
443
|
+
See `field_split_pattern` for examples.
|
444
|
+
|
407
445
|
|
408
446
|
[id="plugins-{type}s-{plugin}-common-options"]
|
409
447
|
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/kv.rb
CHANGED
@@ -123,7 +123,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
123
123
|
# }
|
124
124
|
config :transform_key, :validate => [TRANSFORM_LOWERCASE_KEY, TRANSFORM_UPPERCASE_KEY, TRANSFORM_CAPITALIZE_KEY]
|
125
125
|
|
126
|
-
# A string of characters to use as delimiters for parsing out key-value pairs.
|
126
|
+
# A string of characters to use as single-character field delimiters for parsing out key-value pairs.
|
127
127
|
#
|
128
128
|
# These characters form a regex character class and thus you must escape special regex
|
129
129
|
# characters like `[` or `]` using `\`.
|
@@ -149,8 +149,25 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
149
149
|
# * `ss: 12345`
|
150
150
|
config :field_split, :validate => :string, :default => ' '
|
151
151
|
|
152
|
+
# A regex expression to use as field delimiter for parsing out key-value pairs.
|
153
|
+
# Useful to define multi-character field delimiters.
|
154
|
+
# Setting the field_split_pattern options will take precedence over the field_split option.
|
155
|
+
#
|
156
|
+
# Note that you should avoid using captured groups in your regex and you should be
|
157
|
+
# cautious with lookaheads or lookbehinds and positional anchors.
|
158
|
+
#
|
159
|
+
# For example, to split fields on a repetition of one or more colons
|
160
|
+
# `k1=v1:k2=v2::k3=v3:::k4=v4`:
|
161
|
+
# [source,ruby]
|
162
|
+
# filter { kv { field_split_pattern => ":+" } }
|
163
|
+
#
|
164
|
+
# To split fields on a regex character that need escaping like the plus sign
|
165
|
+
# `k1=v1++k2=v2++k3=v3++k4=v4`:
|
166
|
+
# [source,ruby]
|
167
|
+
# filter { kv { field_split_pattern => "\\+\\+" } }
|
168
|
+
config :field_split_pattern, :validate => :string
|
152
169
|
|
153
|
-
# A non-empty string of characters to use as delimiters for
|
170
|
+
# A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
|
154
171
|
#
|
155
172
|
# These characters form a regex character class and thus you must escape special regex
|
156
173
|
# characters like `[` or `]` using `\`.
|
@@ -161,6 +178,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
161
178
|
# filter { kv { value_split => ":" } }
|
162
179
|
config :value_split, :validate => :string, :default => '='
|
163
180
|
|
181
|
+
# A regex expression to use as value delimiter for parsing out key-value pairs.
|
182
|
+
# Useful to define multi-character value delimiters.
|
183
|
+
# Setting the value_split_pattern options will take precedence over the value_split option.
|
184
|
+
#
|
185
|
+
# Note that you should avoid using captured groups in your regex and you should be
|
186
|
+
# cautious with lookaheads or lookbehinds and positional anchors.
|
187
|
+
#
|
188
|
+
# See field_split_pattern for examples.
|
189
|
+
config :value_split_pattern, :validate => :string
|
190
|
+
|
164
191
|
# A string to prepend to all of the extracted keys.
|
165
192
|
#
|
166
193
|
# For example, to prepend arg_ to all keys:
|
@@ -279,24 +306,70 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
279
306
|
def register
|
280
307
|
if @value_split.empty?
|
281
308
|
raise LogStash::ConfigurationError, I18n.t(
|
282
|
-
"logstash.
|
309
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
283
310
|
:plugin => "filter",
|
284
311
|
:type => "kv",
|
285
312
|
:error => "Configuration option 'value_split' must be a non-empty string"
|
286
313
|
)
|
287
314
|
end
|
288
315
|
|
316
|
+
if @field_split_pattern && @field_split_pattern.empty?
|
317
|
+
raise LogStash::ConfigurationError, I18n.t(
|
318
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
319
|
+
:plugin => "filter",
|
320
|
+
:type => "kv",
|
321
|
+
:error => "Configuration option 'field_split_pattern' must be a non-empty string"
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
if @value_split_pattern && @value_split_pattern.empty?
|
326
|
+
raise LogStash::ConfigurationError, I18n.t(
|
327
|
+
"logstash.runner.configuration.invalid_plugin_register",
|
328
|
+
:plugin => "filter",
|
329
|
+
:type => "kv",
|
330
|
+
:error => "Configuration option 'value_split_pattern' must be a non-empty string"
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
289
334
|
@trim_value_re = Regexp.new("^[#{@trim_value}]|[#{@trim_value}]$") if @trim_value
|
290
335
|
@trim_key_re = Regexp.new("^[#{@trim_key}]|[#{@trim_key}]$") if @trim_key
|
291
336
|
|
292
337
|
@remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
|
293
338
|
@remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
|
294
339
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
340
|
+
field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
|
341
|
+
value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
|
342
|
+
|
343
|
+
optional_whitespace = /\s*/
|
344
|
+
eof = /$/
|
345
|
+
|
346
|
+
value_pattern = begin
|
347
|
+
# each component expression within value_pattern _must_ capture exactly once.
|
348
|
+
value_patterns = []
|
349
|
+
|
350
|
+
value_patterns << quoted_capture(%q(")) # quoted double
|
351
|
+
value_patterns << quoted_capture(%q(')) # quoted single
|
352
|
+
if @include_brackets
|
353
|
+
value_patterns << quoted_capture('(', ')') # bracketed paren
|
354
|
+
value_patterns << quoted_capture('[', ']') # bracketed square
|
355
|
+
value_patterns << quoted_capture('<', '>') # bracketed angle
|
356
|
+
end
|
357
|
+
|
358
|
+
# an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
|
359
|
+
value_patterns << /((?:\\ |.)+?)(?=#{Regexp::union(field_split, eof)})/
|
360
|
+
|
361
|
+
Regexp.union(*value_patterns)
|
362
|
+
end
|
363
|
+
|
364
|
+
|
365
|
+
# a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
|
366
|
+
# and followed by either a `value_split`, a `field_split`, or EOF.
|
367
|
+
key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
|
368
|
+
|
369
|
+
@scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
|
370
|
+
@value_split_re = value_split
|
371
|
+
|
372
|
+
@logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
|
300
373
|
end
|
301
374
|
|
302
375
|
def filter(event)
|
@@ -335,6 +408,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
335
408
|
s =~ @value_split_re
|
336
409
|
end
|
337
410
|
|
411
|
+
# Helper function for generating single-capture `Regexp` that, when matching a string bound by the given quotes
|
412
|
+
# or brackets, will capture the content that is between the quotes or brackets.
|
413
|
+
#
|
414
|
+
# @api private
|
415
|
+
# @param quote_sequence [String] a character sequence that begins a quoted expression
|
416
|
+
# @param close_quote_sequence [String] a character sequence that ends a quoted expression; (default: quote_sequence)
|
417
|
+
# @return [Regexp] with a single capture group representing content that is between the given quotes
|
418
|
+
def quoted_capture(quote_sequence, close_quote_sequence=quote_sequence)
|
419
|
+
fail('quote_sequence must be non-empty!') if quote_sequence.nil? || quote_sequence.empty?
|
420
|
+
fail('close_quote_sequence must be non-empty!') if close_quote_sequence.nil? || close_quote_sequence.empty?
|
421
|
+
|
422
|
+
open_pattern = /#{Regexp.quote(quote_sequence)}/
|
423
|
+
close_pattern = /#{Regexp.quote(close_quote_sequence)}/
|
424
|
+
|
425
|
+
# matches a sequence of zero or more characters that is followed by the `close_quote_sequence`
|
426
|
+
quoted_value_pattern = /(?:.)*?(?=#{Regexp.quote(close_quote_sequence)})/
|
427
|
+
|
428
|
+
/#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
|
429
|
+
end
|
430
|
+
|
338
431
|
def transform(text, method)
|
339
432
|
case method
|
340
433
|
when TRANSFORM_LOWERCASE_KEY
|
@@ -354,8 +447,10 @@ class LogStash::Filters::KV < LogStash::Filters::Base
|
|
354
447
|
include_keys = @include_keys.map{|key| event.sprintf(key)}
|
355
448
|
exclude_keys = @exclude_keys.map{|key| event.sprintf(key)}
|
356
449
|
|
357
|
-
text.scan(@scan_re) do |key,
|
358
|
-
value =
|
450
|
+
text.scan(@scan_re) do |key, *value_candidates|
|
451
|
+
value = value_candidates.compact.first
|
452
|
+
next if value.nil? || value.empty?
|
453
|
+
|
359
454
|
key = @trim_key ? key.gsub(@trim_key_re, "") : key
|
360
455
|
key = @remove_char_key ? key.gsub(@remove_char_key_re, "") : key
|
361
456
|
key = @transform_key ? transform(key, @transform_key) : key
|
data/logstash-filter-kv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-kv'
|
4
|
-
s.version = '4.0
|
4
|
+
s.version = '4.1.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses key-value pairs"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/kv_spec.rb
CHANGED
@@ -772,3 +772,129 @@ describe LogStash::Filters::KV do
|
|
772
772
|
end
|
773
773
|
end
|
774
774
|
end
|
775
|
+
|
776
|
+
describe "multi character splitting" do
|
777
|
+
subject do
|
778
|
+
plugin = LogStash::Filters::KV.new(options)
|
779
|
+
plugin.register
|
780
|
+
plugin
|
781
|
+
end
|
782
|
+
|
783
|
+
let(:data) { {"message" => message} }
|
784
|
+
let(:event) { LogStash::Event.new(data) }
|
785
|
+
|
786
|
+
shared_examples "parsing all fields and values" do
|
787
|
+
it "parses all fields and values" do
|
788
|
+
subject.filter(event)
|
789
|
+
expect(event.get("hello")).to eq("world")
|
790
|
+
expect(event.get("foo")).to eq("bar")
|
791
|
+
expect(event.get("baz")).to eq("fizz")
|
792
|
+
expect(event.get("doublequoted")).to eq("hello world")
|
793
|
+
expect(event.get("singlequoted")).to eq("hello world")
|
794
|
+
expect(event.get("bracketsone")).to eq("hello world")
|
795
|
+
expect(event.get("bracketstwo")).to eq("hello world")
|
796
|
+
expect(event.get("bracketsthree")).to eq("hello world")
|
797
|
+
end
|
798
|
+
end
|
799
|
+
|
800
|
+
context "empty value_split_pattern" do
|
801
|
+
let(:options) { { "value_split_pattern" => "" } }
|
802
|
+
it "should raise ConfigurationError" do
|
803
|
+
expect{subject}.to raise_error(LogStash::ConfigurationError)
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
context "empty field_split_pattern" do
|
808
|
+
let(:options) { { "field_split_pattern" => "" } }
|
809
|
+
it "should raise ConfigurationError" do
|
810
|
+
expect{subject}.to raise_error(LogStash::ConfigurationError)
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
context "single split" do
|
815
|
+
let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
|
816
|
+
let(:options) {
|
817
|
+
{
|
818
|
+
"field_split" => " ",
|
819
|
+
"value_split" => ":",
|
820
|
+
}
|
821
|
+
}
|
822
|
+
it_behaves_like "parsing all fields and values"
|
823
|
+
end
|
824
|
+
|
825
|
+
context "value split multi" do
|
826
|
+
let(:message) { "hello::world foo::bar baz::fizz doublequoted::\"hello world\" singlequoted::'hello world' bracketsone::(hello world) bracketstwo::[hello world] bracketsthree::<hello world>" }
|
827
|
+
let(:options) {
|
828
|
+
{
|
829
|
+
"field_split" => " ",
|
830
|
+
"value_split_pattern" => "::",
|
831
|
+
}
|
832
|
+
}
|
833
|
+
it_behaves_like "parsing all fields and values"
|
834
|
+
end
|
835
|
+
|
836
|
+
context "field and value split multi" do
|
837
|
+
let(:message) { "hello::world__foo::bar__baz::fizz__doublequoted::\"hello world\"__singlequoted::'hello world'__bracketsone::(hello world)__bracketstwo::[hello world]__bracketsthree::<hello world>" }
|
838
|
+
let(:options) {
|
839
|
+
{
|
840
|
+
"field_split_pattern" => "__",
|
841
|
+
"value_split_pattern" => "::",
|
842
|
+
}
|
843
|
+
}
|
844
|
+
it_behaves_like "parsing all fields and values"
|
845
|
+
end
|
846
|
+
|
847
|
+
context "field and value split multi with regex" do
|
848
|
+
let(:message) { "hello:world_foo::bar__baz:::fizz___doublequoted:::\"hello world\"____singlequoted:::::'hello world'____bracketsone:::(hello world)__bracketstwo:[hello world]_bracketsthree::::::<hello world>" }
|
849
|
+
let(:options) {
|
850
|
+
{
|
851
|
+
"field_split_pattern" => "_+",
|
852
|
+
"value_split_pattern" => ":+",
|
853
|
+
}
|
854
|
+
}
|
855
|
+
it_behaves_like "parsing all fields and values"
|
856
|
+
end
|
857
|
+
|
858
|
+
context "field and value split multi using singe char" do
|
859
|
+
let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
|
860
|
+
let(:options) {
|
861
|
+
{
|
862
|
+
"field_split_pattern" => " ",
|
863
|
+
"value_split_pattern" => ":",
|
864
|
+
}
|
865
|
+
}
|
866
|
+
it_behaves_like "parsing all fields and values"
|
867
|
+
end
|
868
|
+
|
869
|
+
context "field and value split multi using escaping" do
|
870
|
+
let(:message) { "hello++world??foo++bar??baz++fizz??doublequoted++\"hello world\"??singlequoted++'hello world'??bracketsone++(hello world)??bracketstwo++[hello world]??bracketsthree++<hello world>" }
|
871
|
+
let(:options) {
|
872
|
+
{
|
873
|
+
"field_split_pattern" => "\\?\\?",
|
874
|
+
"value_split_pattern" => "\\+\\+",
|
875
|
+
}
|
876
|
+
}
|
877
|
+
it_behaves_like "parsing all fields and values"
|
878
|
+
end
|
879
|
+
|
880
|
+
|
881
|
+
context "example from @guyboertje in #15" do
|
882
|
+
let(:message) { 'key1: val1; key2: val2; key3: https://site/?g={......"...; CLR rv:11.0)"..}; key4: val4;' }
|
883
|
+
let(:options) {
|
884
|
+
{
|
885
|
+
"field_split_pattern" => ";\s*(?=key.+?:)|;$",
|
886
|
+
"value_split_pattern" => ":\s+",
|
887
|
+
}
|
888
|
+
}
|
889
|
+
|
890
|
+
it "parses all fields and values" do
|
891
|
+
subject.filter(event)
|
892
|
+
|
893
|
+
expect(event.get("key1")).to eq("val1")
|
894
|
+
expect(event.get("key2")).to eq("val2")
|
895
|
+
expect(event.get("key3")).to eq("https://site/?g={......\"...; CLR rv:11.0)\"..}")
|
896
|
+
expect(event.get("key4")).to eq("val4")
|
897
|
+
end
|
898
|
+
end
|
899
|
+
|
900
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-kv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0
|
4
|
+
version: 4.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
84
|
version: '0'
|
85
85
|
requirements: []
|
86
86
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.6.
|
87
|
+
rubygems_version: 2.6.13
|
88
88
|
signing_key:
|
89
89
|
specification_version: 4
|
90
90
|
summary: Parses key-value pairs
|