logstash-filter-kv 4.0.3 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3020b8c8f8399dfaaf9533b83ed66af6f90ab9843476d45e934feba3f48acea9
4
- data.tar.gz: f5bde51c1f94ef0c376d4c1f9754eaca9da89daca9171326e16e2cd11db25fbe
3
+ metadata.gz: db1a1524254aafe6af64400a781c90fe02a0e82bcc037d8ce43b652de30dea93
4
+ data.tar.gz: 36443b889dfde58914512466fe3eadb703fca7c30b665a3d02106f5c6b91cada
5
5
  SHA512:
6
- metadata.gz: 40b0548bdf2a9337529f9078fb330582a98860d35672fcd826f6e2d7356b8f0add71186e7ae98022441260f9788bf727318263dfdd649761ea9ec1a4e2e9b2eb
7
- data.tar.gz: 35f1bc9f1af780f015946a7101392b7dc0a8c72c5a28e45edff46aa2cc40dab1c3b7d590b7c71c45d354a5c0814034e116411392432d73f7b3fbfa6f7e12aed9
6
+ metadata.gz: 072403211a806a928e240df180ab2180257b22579db3dd9de9d3ec4065f0e9b75374db3ade99c7e09ad0fc5b66f09701026ee97f81ec3a76c98826b50bd33e94
7
+ data.tar.gz: 8c7dd7066d669c4f0fea15942f14fc2c80a9412190e31bb0b36cb7382843b3f8839b843b89c72ba930eb04e697e7e5536b01603c78c0666db2b81a49948a0d3d
@@ -1,3 +1,6 @@
1
+ ## 4.1.0
2
+ - feature: add option to split fields and values using a regex pattern (#55)
3
+
1
4
  ## 4.0.3
2
5
  - Update gemspec summary
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012–2016 Elasticsearch <http://www.elastic.co>
1
+ Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
2
 
3
3
  Licensed under the Apache License, Version 2.0 (the "License");
4
4
  you may not use this file except in compliance with the License.
@@ -135,7 +135,7 @@ To exclude `from` and `to`, but retain the `foo` key, you could use this configu
135
135
  * Value type is <<string,string>>
136
136
  * Default value is `" "`
137
137
 
138
- A string of characters to use as delimiters for parsing out key-value pairs.
138
+ A string of characters to use as single-character field delimiters for parsing out key-value pairs.
139
139
 
140
140
  These characters form a regex character class and thus you must escape special regex
141
141
  characters like `[` or `]` using `\`.
@@ -160,6 +160,29 @@ fields:
160
160
  * `oq: bobo`
161
161
  * `ss: 12345`
162
162
 
163
+ [id="plugins-{type}s-{plugin}-field_split_pattern"]
164
+ ===== `field_split_pattern`
165
+
166
+ * Value type is <<string,string>>
167
+ * There is no default value for this setting.
168
+
169
+ A regex expression to use as field delimiter for parsing out key-value pairs.
170
+ Useful to define multi-character field delimiters.
171
+ Setting the `field_split_pattern` options will take precedence over the `field_split` option.
172
+
173
+ Note that you should avoid using captured groups in your regex and you should be
174
+ cautious with lookaheads or lookbehinds and positional anchors.
175
+
176
+ For example, to split fields on a repetition of one or more colons
177
+ `k1=v1:k2=v2::k3=v3:::k4=v4`:
178
+ [source,ruby]
179
+ filter { kv { field_split_pattern => ":+" } }
180
+
181
+ To split fields on a regex character that need escaping like the plus sign
182
+ `k1=v1++k2=v2++k3=v3++k4=v4`:
183
+ [source,ruby]
184
+ filter { kv { field_split_pattern => "\\+\\+" } }
185
+
163
186
  [id="plugins-{type}s-{plugin}-include_brackets"]
164
187
  ===== `include_brackets`
165
188
 
@@ -393,7 +416,7 @@ For example, to trim `<`, `>`, `[`, `]` and `,` characters from values:
393
416
  * Value type is <<string,string>>
394
417
  * Default value is `"="`
395
418
 
396
- A non-empty string of characters to use as delimiters for identifying key-value relations.
419
+ A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
397
420
 
398
421
  These characters form a regex character class and thus you must escape special regex
399
422
  characters like `[` or `]` using `\`.
@@ -404,6 +427,21 @@ For example, to identify key-values such as
404
427
  filter { kv { value_split => ":" } }
405
428
 
406
429
 
430
+ [id="plugins-{type}s-{plugin}-value_split_pattern"]
431
+ ===== `value_split_pattern`
432
+
433
+ * Value type is <<string,string>>
434
+ * There is no default value for this setting.
435
+
436
+ A regex expression to use as value delimiter for parsing out key-value pairs.
437
+ Useful to define multi-character value delimiters.
438
+ Setting the `value_split_pattern` options will take precedence over the `value_split option`.
439
+
440
+ Note that you should avoid using captured groups in your regex and you should be
441
+ cautious with lookaheads or lookbehinds and positional anchors.
442
+
443
+ See `field_split_pattern` for examples.
444
+
407
445
 
408
446
  [id="plugins-{type}s-{plugin}-common-options"]
409
447
  include::{include_path}/{type}.asciidoc[]
@@ -123,7 +123,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base
123
123
  # }
124
124
  config :transform_key, :validate => [TRANSFORM_LOWERCASE_KEY, TRANSFORM_UPPERCASE_KEY, TRANSFORM_CAPITALIZE_KEY]
125
125
 
126
- # A string of characters to use as delimiters for parsing out key-value pairs.
126
+ # A string of characters to use as single-character field delimiters for parsing out key-value pairs.
127
127
  #
128
128
  # These characters form a regex character class and thus you must escape special regex
129
129
  # characters like `[` or `]` using `\`.
@@ -149,8 +149,25 @@ class LogStash::Filters::KV < LogStash::Filters::Base
149
149
  # * `ss: 12345`
150
150
  config :field_split, :validate => :string, :default => ' '
151
151
 
152
+ # A regex expression to use as field delimiter for parsing out key-value pairs.
153
+ # Useful to define multi-character field delimiters.
154
+ # Setting the field_split_pattern options will take precedence over the field_split option.
155
+ #
156
+ # Note that you should avoid using captured groups in your regex and you should be
157
+ # cautious with lookaheads or lookbehinds and positional anchors.
158
+ #
159
+ # For example, to split fields on a repetition of one or more colons
160
+ # `k1=v1:k2=v2::k3=v3:::k4=v4`:
161
+ # [source,ruby]
162
+ # filter { kv { field_split_pattern => ":+" } }
163
+ #
164
+ # To split fields on a regex character that need escaping like the plus sign
165
+ # `k1=v1++k2=v2++k3=v3++k4=v4`:
166
+ # [source,ruby]
167
+ # filter { kv { field_split_pattern => "\\+\\+" } }
168
+ config :field_split_pattern, :validate => :string
152
169
 
153
- # A non-empty string of characters to use as delimiters for identifying key-value relations.
170
+ # A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
154
171
  #
155
172
  # These characters form a regex character class and thus you must escape special regex
156
173
  # characters like `[` or `]` using `\`.
@@ -161,6 +178,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
161
178
  # filter { kv { value_split => ":" } }
162
179
  config :value_split, :validate => :string, :default => '='
163
180
 
181
+ # A regex expression to use as value delimiter for parsing out key-value pairs.
182
+ # Useful to define multi-character value delimiters.
183
+ # Setting the value_split_pattern options will take precedence over the value_split option.
184
+ #
185
+ # Note that you should avoid using captured groups in your regex and you should be
186
+ # cautious with lookaheads or lookbehinds and positional anchors.
187
+ #
188
+ # See field_split_pattern for examples.
189
+ config :value_split_pattern, :validate => :string
190
+
164
191
  # A string to prepend to all of the extracted keys.
165
192
  #
166
193
  # For example, to prepend arg_ to all keys:
@@ -279,24 +306,70 @@ class LogStash::Filters::KV < LogStash::Filters::Base
279
306
  def register
280
307
  if @value_split.empty?
281
308
  raise LogStash::ConfigurationError, I18n.t(
282
- "logstash.agent.configuration.invalid_plugin_register",
309
+ "logstash.runner.configuration.invalid_plugin_register",
283
310
  :plugin => "filter",
284
311
  :type => "kv",
285
312
  :error => "Configuration option 'value_split' must be a non-empty string"
286
313
  )
287
314
  end
288
315
 
316
+ if @field_split_pattern && @field_split_pattern.empty?
317
+ raise LogStash::ConfigurationError, I18n.t(
318
+ "logstash.runner.configuration.invalid_plugin_register",
319
+ :plugin => "filter",
320
+ :type => "kv",
321
+ :error => "Configuration option 'field_split_pattern' must be a non-empty string"
322
+ )
323
+ end
324
+
325
+ if @value_split_pattern && @value_split_pattern.empty?
326
+ raise LogStash::ConfigurationError, I18n.t(
327
+ "logstash.runner.configuration.invalid_plugin_register",
328
+ :plugin => "filter",
329
+ :type => "kv",
330
+ :error => "Configuration option 'value_split_pattern' must be a non-empty string"
331
+ )
332
+ end
333
+
289
334
  @trim_value_re = Regexp.new("^[#{@trim_value}]|[#{@trim_value}]$") if @trim_value
290
335
  @trim_key_re = Regexp.new("^[#{@trim_key}]|[#{@trim_key}]$") if @trim_key
291
336
 
292
337
  @remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
293
338
  @remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
294
339
 
295
- valueRxString = "(?:\"([^\"]+)\"|'([^']+)'"
296
- valueRxString += "|\\(([^\\)]+)\\)|\\[([^\\]]+)\\]|<([^>]+)>" if @include_brackets
297
- valueRxString += "|((?:\\\\ |[^" + @field_split + "])+))"
298
- @scan_re = Regexp.new("((?:\\\\ |[^" + @field_split + @value_split + "])+)\s*[" + @value_split + "]\s*" + valueRxString)
299
- @value_split_re = /[#{@value_split}]/
340
+ field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
341
+ value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
342
+
343
+ optional_whitespace = /\s*/
344
+ eof = /$/
345
+
346
+ value_pattern = begin
347
+ # each component expression within value_pattern _must_ capture exactly once.
348
+ value_patterns = []
349
+
350
+ value_patterns << quoted_capture(%q(")) # quoted double
351
+ value_patterns << quoted_capture(%q(')) # quoted single
352
+ if @include_brackets
353
+ value_patterns << quoted_capture('(', ')') # bracketed paren
354
+ value_patterns << quoted_capture('[', ']') # bracketed square
355
+ value_patterns << quoted_capture('<', '>') # bracketed angle
356
+ end
357
+
358
+ # an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
359
+ value_patterns << /((?:\\ |.)+?)(?=#{Regexp::union(field_split, eof)})/
360
+
361
+ Regexp.union(*value_patterns)
362
+ end
363
+
364
+
365
+ # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
+ # and followed by either a `value_split`, a `field_split`, or EOF.
367
+ key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
368
+
369
+ @scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
370
+ @value_split_re = value_split
371
+
372
+ @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
300
373
  end
301
374
 
302
375
  def filter(event)
@@ -335,6 +408,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
335
408
  s =~ @value_split_re
336
409
  end
337
410
 
411
+ # Helper function for generating single-capture `Regexp` that, when matching a string bound by the given quotes
412
+ # or brackets, will capture the content that is between the quotes or brackets.
413
+ #
414
+ # @api private
415
+ # @param quote_sequence [String] a character sequence that begins a quoted expression
416
+ # @param close_quote_sequence [String] a character sequence that ends a quoted expression; (default: quote_sequence)
417
+ # @return [Regexp] with a single capture group representing content that is between the given quotes
418
+ def quoted_capture(quote_sequence, close_quote_sequence=quote_sequence)
419
+ fail('quote_sequence must be non-empty!') if quote_sequence.nil? || quote_sequence.empty?
420
+ fail('close_quote_sequence must be non-empty!') if close_quote_sequence.nil? || close_quote_sequence.empty?
421
+
422
+ open_pattern = /#{Regexp.quote(quote_sequence)}/
423
+ close_pattern = /#{Regexp.quote(close_quote_sequence)}/
424
+
425
+ # matches a sequence of zero or more characters that is followed by the `close_quote_sequence`
426
+ quoted_value_pattern = /(?:.)*?(?=#{Regexp.quote(close_quote_sequence)})/
427
+
428
+ /#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
429
+ end
430
+
338
431
  def transform(text, method)
339
432
  case method
340
433
  when TRANSFORM_LOWERCASE_KEY
@@ -354,8 +447,10 @@ class LogStash::Filters::KV < LogStash::Filters::Base
354
447
  include_keys = @include_keys.map{|key| event.sprintf(key)}
355
448
  exclude_keys = @exclude_keys.map{|key| event.sprintf(key)}
356
449
 
357
- text.scan(@scan_re) do |key, v1, v2, v3, v4, v5, v6|
358
- value = v1 || v2 || v3 || v4 || v5 || v6
450
+ text.scan(@scan_re) do |key, *value_candidates|
451
+ value = value_candidates.compact.first
452
+ next if value.nil? || value.empty?
453
+
359
454
  key = @trim_key ? key.gsub(@trim_key_re, "") : key
360
455
  key = @remove_char_key ? key.gsub(@remove_char_key_re, "") : key
361
456
  key = @transform_key ? transform(key, @transform_key) : key
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.0.3'
4
+ s.version = '4.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -772,3 +772,129 @@ describe LogStash::Filters::KV do
772
772
  end
773
773
  end
774
774
  end
775
+
776
+ describe "multi character splitting" do
777
+ subject do
778
+ plugin = LogStash::Filters::KV.new(options)
779
+ plugin.register
780
+ plugin
781
+ end
782
+
783
+ let(:data) { {"message" => message} }
784
+ let(:event) { LogStash::Event.new(data) }
785
+
786
+ shared_examples "parsing all fields and values" do
787
+ it "parses all fields and values" do
788
+ subject.filter(event)
789
+ expect(event.get("hello")).to eq("world")
790
+ expect(event.get("foo")).to eq("bar")
791
+ expect(event.get("baz")).to eq("fizz")
792
+ expect(event.get("doublequoted")).to eq("hello world")
793
+ expect(event.get("singlequoted")).to eq("hello world")
794
+ expect(event.get("bracketsone")).to eq("hello world")
795
+ expect(event.get("bracketstwo")).to eq("hello world")
796
+ expect(event.get("bracketsthree")).to eq("hello world")
797
+ end
798
+ end
799
+
800
+ context "empty value_split_pattern" do
801
+ let(:options) { { "value_split_pattern" => "" } }
802
+ it "should raise ConfigurationError" do
803
+ expect{subject}.to raise_error(LogStash::ConfigurationError)
804
+ end
805
+ end
806
+
807
+ context "empty field_split_pattern" do
808
+ let(:options) { { "field_split_pattern" => "" } }
809
+ it "should raise ConfigurationError" do
810
+ expect{subject}.to raise_error(LogStash::ConfigurationError)
811
+ end
812
+ end
813
+
814
+ context "single split" do
815
+ let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
816
+ let(:options) {
817
+ {
818
+ "field_split" => " ",
819
+ "value_split" => ":",
820
+ }
821
+ }
822
+ it_behaves_like "parsing all fields and values"
823
+ end
824
+
825
+ context "value split multi" do
826
+ let(:message) { "hello::world foo::bar baz::fizz doublequoted::\"hello world\" singlequoted::'hello world' bracketsone::(hello world) bracketstwo::[hello world] bracketsthree::<hello world>" }
827
+ let(:options) {
828
+ {
829
+ "field_split" => " ",
830
+ "value_split_pattern" => "::",
831
+ }
832
+ }
833
+ it_behaves_like "parsing all fields and values"
834
+ end
835
+
836
+ context "field and value split multi" do
837
+ let(:message) { "hello::world__foo::bar__baz::fizz__doublequoted::\"hello world\"__singlequoted::'hello world'__bracketsone::(hello world)__bracketstwo::[hello world]__bracketsthree::<hello world>" }
838
+ let(:options) {
839
+ {
840
+ "field_split_pattern" => "__",
841
+ "value_split_pattern" => "::",
842
+ }
843
+ }
844
+ it_behaves_like "parsing all fields and values"
845
+ end
846
+
847
+ context "field and value split multi with regex" do
848
+ let(:message) { "hello:world_foo::bar__baz:::fizz___doublequoted:::\"hello world\"____singlequoted:::::'hello world'____bracketsone:::(hello world)__bracketstwo:[hello world]_bracketsthree::::::<hello world>" }
849
+ let(:options) {
850
+ {
851
+ "field_split_pattern" => "_+",
852
+ "value_split_pattern" => ":+",
853
+ }
854
+ }
855
+ it_behaves_like "parsing all fields and values"
856
+ end
857
+
858
+ context "field and value split multi using singe char" do
859
+ let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
860
+ let(:options) {
861
+ {
862
+ "field_split_pattern" => " ",
863
+ "value_split_pattern" => ":",
864
+ }
865
+ }
866
+ it_behaves_like "parsing all fields and values"
867
+ end
868
+
869
+ context "field and value split multi using escaping" do
870
+ let(:message) { "hello++world??foo++bar??baz++fizz??doublequoted++\"hello world\"??singlequoted++'hello world'??bracketsone++(hello world)??bracketstwo++[hello world]??bracketsthree++<hello world>" }
871
+ let(:options) {
872
+ {
873
+ "field_split_pattern" => "\\?\\?",
874
+ "value_split_pattern" => "\\+\\+",
875
+ }
876
+ }
877
+ it_behaves_like "parsing all fields and values"
878
+ end
879
+
880
+
881
+ context "example from @guyboertje in #15" do
882
+ let(:message) { 'key1: val1; key2: val2; key3: https://site/?g={......"...; CLR rv:11.0)"..}; key4: val4;' }
883
+ let(:options) {
884
+ {
885
+ "field_split_pattern" => ";\s*(?=key.+?:)|;$",
886
+ "value_split_pattern" => ":\s+",
887
+ }
888
+ }
889
+
890
+ it "parses all fields and values" do
891
+ subject.filter(event)
892
+
893
+ expect(event.get("key1")).to eq("val1")
894
+ expect(event.get("key2")).to eq("val2")
895
+ expect(event.get("key3")).to eq("https://site/?g={......\"...; CLR rv:11.0)\"..}")
896
+ expect(event.get("key4")).to eq("val4")
897
+ end
898
+ end
899
+
900
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.3
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.6.11
87
+ rubygems_version: 2.6.13
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: Parses key-value pairs