logstash-filter-kv 4.0.3 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3020b8c8f8399dfaaf9533b83ed66af6f90ab9843476d45e934feba3f48acea9
4
- data.tar.gz: f5bde51c1f94ef0c376d4c1f9754eaca9da89daca9171326e16e2cd11db25fbe
3
+ metadata.gz: db1a1524254aafe6af64400a781c90fe02a0e82bcc037d8ce43b652de30dea93
4
+ data.tar.gz: 36443b889dfde58914512466fe3eadb703fca7c30b665a3d02106f5c6b91cada
5
5
  SHA512:
6
- metadata.gz: 40b0548bdf2a9337529f9078fb330582a98860d35672fcd826f6e2d7356b8f0add71186e7ae98022441260f9788bf727318263dfdd649761ea9ec1a4e2e9b2eb
7
- data.tar.gz: 35f1bc9f1af780f015946a7101392b7dc0a8c72c5a28e45edff46aa2cc40dab1c3b7d590b7c71c45d354a5c0814034e116411392432d73f7b3fbfa6f7e12aed9
6
+ metadata.gz: 072403211a806a928e240df180ab2180257b22579db3dd9de9d3ec4065f0e9b75374db3ade99c7e09ad0fc5b66f09701026ee97f81ec3a76c98826b50bd33e94
7
+ data.tar.gz: 8c7dd7066d669c4f0fea15942f14fc2c80a9412190e31bb0b36cb7382843b3f8839b843b89c72ba930eb04e697e7e5536b01603c78c0666db2b81a49948a0d3d
@@ -1,3 +1,6 @@
1
+ ## 4.1.0
2
+ - feature: add option to split fields and values using a regex pattern (#55)
3
+
1
4
  ## 4.0.3
2
5
  - Update gemspec summary
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012–2016 Elasticsearch <http://www.elastic.co>
1
+ Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
2
 
3
3
  Licensed under the Apache License, Version 2.0 (the "License");
4
4
  you may not use this file except in compliance with the License.
@@ -135,7 +135,7 @@ To exclude `from` and `to`, but retain the `foo` key, you could use this configu
135
135
  * Value type is <<string,string>>
136
136
  * Default value is `" "`
137
137
 
138
- A string of characters to use as delimiters for parsing out key-value pairs.
138
+ A string of characters to use as single-character field delimiters for parsing out key-value pairs.
139
139
 
140
140
  These characters form a regex character class and thus you must escape special regex
141
141
  characters like `[` or `]` using `\`.
@@ -160,6 +160,29 @@ fields:
160
160
  * `oq: bobo`
161
161
  * `ss: 12345`
162
162
 
163
+ [id="plugins-{type}s-{plugin}-field_split_pattern"]
164
+ ===== `field_split_pattern`
165
+
166
+ * Value type is <<string,string>>
167
+ * There is no default value for this setting.
168
+
169
+ A regex expression to use as field delimiter for parsing out key-value pairs.
170
+ Useful to define multi-character field delimiters.
171
+ Setting the `field_split_pattern` options will take precedence over the `field_split` option.
172
+
173
+ Note that you should avoid using captured groups in your regex and you should be
174
+ cautious with lookaheads or lookbehinds and positional anchors.
175
+
176
+ For example, to split fields on a repetition of one or more colons
177
+ `k1=v1:k2=v2::k3=v3:::k4=v4`:
178
+ [source,ruby]
179
+ filter { kv { field_split_pattern => ":+" } }
180
+
181
+ To split fields on a regex character that need escaping like the plus sign
182
+ `k1=v1++k2=v2++k3=v3++k4=v4`:
183
+ [source,ruby]
184
+ filter { kv { field_split_pattern => "\\+\\+" } }
185
+
163
186
  [id="plugins-{type}s-{plugin}-include_brackets"]
164
187
  ===== `include_brackets`
165
188
 
@@ -393,7 +416,7 @@ For example, to trim `<`, `>`, `[`, `]` and `,` characters from values:
393
416
  * Value type is <<string,string>>
394
417
  * Default value is `"="`
395
418
 
396
- A non-empty string of characters to use as delimiters for identifying key-value relations.
419
+ A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
397
420
 
398
421
  These characters form a regex character class and thus you must escape special regex
399
422
  characters like `[` or `]` using `\`.
@@ -404,6 +427,21 @@ For example, to identify key-values such as
404
427
  filter { kv { value_split => ":" } }
405
428
 
406
429
 
430
+ [id="plugins-{type}s-{plugin}-value_split_pattern"]
431
+ ===== `value_split_pattern`
432
+
433
+ * Value type is <<string,string>>
434
+ * There is no default value for this setting.
435
+
436
+ A regex expression to use as value delimiter for parsing out key-value pairs.
437
+ Useful to define multi-character value delimiters.
438
+ Setting the `value_split_pattern` options will take precedence over the `value_split option`.
439
+
440
+ Note that you should avoid using captured groups in your regex and you should be
441
+ cautious with lookaheads or lookbehinds and positional anchors.
442
+
443
+ See `field_split_pattern` for examples.
444
+
407
445
 
408
446
  [id="plugins-{type}s-{plugin}-common-options"]
409
447
  include::{include_path}/{type}.asciidoc[]
@@ -123,7 +123,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base
123
123
  # }
124
124
  config :transform_key, :validate => [TRANSFORM_LOWERCASE_KEY, TRANSFORM_UPPERCASE_KEY, TRANSFORM_CAPITALIZE_KEY]
125
125
 
126
- # A string of characters to use as delimiters for parsing out key-value pairs.
126
+ # A string of characters to use as single-character field delimiters for parsing out key-value pairs.
127
127
  #
128
128
  # These characters form a regex character class and thus you must escape special regex
129
129
  # characters like `[` or `]` using `\`.
@@ -149,8 +149,25 @@ class LogStash::Filters::KV < LogStash::Filters::Base
149
149
  # * `ss: 12345`
150
150
  config :field_split, :validate => :string, :default => ' '
151
151
 
152
+ # A regex expression to use as field delimiter for parsing out key-value pairs.
153
+ # Useful to define multi-character field delimiters.
154
+ # Setting the field_split_pattern options will take precedence over the field_split option.
155
+ #
156
+ # Note that you should avoid using captured groups in your regex and you should be
157
+ # cautious with lookaheads or lookbehinds and positional anchors.
158
+ #
159
+ # For example, to split fields on a repetition of one or more colons
160
+ # `k1=v1:k2=v2::k3=v3:::k4=v4`:
161
+ # [source,ruby]
162
+ # filter { kv { field_split_pattern => ":+" } }
163
+ #
164
+ # To split fields on a regex character that need escaping like the plus sign
165
+ # `k1=v1++k2=v2++k3=v3++k4=v4`:
166
+ # [source,ruby]
167
+ # filter { kv { field_split_pattern => "\\+\\+" } }
168
+ config :field_split_pattern, :validate => :string
152
169
 
153
- # A non-empty string of characters to use as delimiters for identifying key-value relations.
170
+ # A non-empty string of characters to use as single-character value delimiters for parsing out key-value pairs.
154
171
  #
155
172
  # These characters form a regex character class and thus you must escape special regex
156
173
  # characters like `[` or `]` using `\`.
@@ -161,6 +178,16 @@ class LogStash::Filters::KV < LogStash::Filters::Base
161
178
  # filter { kv { value_split => ":" } }
162
179
  config :value_split, :validate => :string, :default => '='
163
180
 
181
+ # A regex expression to use as value delimiter for parsing out key-value pairs.
182
+ # Useful to define multi-character value delimiters.
183
+ # Setting the value_split_pattern options will take precedence over the value_split option.
184
+ #
185
+ # Note that you should avoid using captured groups in your regex and you should be
186
+ # cautious with lookaheads or lookbehinds and positional anchors.
187
+ #
188
+ # See field_split_pattern for examples.
189
+ config :value_split_pattern, :validate => :string
190
+
164
191
  # A string to prepend to all of the extracted keys.
165
192
  #
166
193
  # For example, to prepend arg_ to all keys:
@@ -279,24 +306,70 @@ class LogStash::Filters::KV < LogStash::Filters::Base
279
306
  def register
280
307
  if @value_split.empty?
281
308
  raise LogStash::ConfigurationError, I18n.t(
282
- "logstash.agent.configuration.invalid_plugin_register",
309
+ "logstash.runner.configuration.invalid_plugin_register",
283
310
  :plugin => "filter",
284
311
  :type => "kv",
285
312
  :error => "Configuration option 'value_split' must be a non-empty string"
286
313
  )
287
314
  end
288
315
 
316
+ if @field_split_pattern && @field_split_pattern.empty?
317
+ raise LogStash::ConfigurationError, I18n.t(
318
+ "logstash.runner.configuration.invalid_plugin_register",
319
+ :plugin => "filter",
320
+ :type => "kv",
321
+ :error => "Configuration option 'field_split_pattern' must be a non-empty string"
322
+ )
323
+ end
324
+
325
+ if @value_split_pattern && @value_split_pattern.empty?
326
+ raise LogStash::ConfigurationError, I18n.t(
327
+ "logstash.runner.configuration.invalid_plugin_register",
328
+ :plugin => "filter",
329
+ :type => "kv",
330
+ :error => "Configuration option 'value_split_pattern' must be a non-empty string"
331
+ )
332
+ end
333
+
289
334
  @trim_value_re = Regexp.new("^[#{@trim_value}]|[#{@trim_value}]$") if @trim_value
290
335
  @trim_key_re = Regexp.new("^[#{@trim_key}]|[#{@trim_key}]$") if @trim_key
291
336
 
292
337
  @remove_char_value_re = Regexp.new("[#{@remove_char_value}]") if @remove_char_value
293
338
  @remove_char_key_re = Regexp.new("[#{@remove_char_key}]") if @remove_char_key
294
339
 
295
- valueRxString = "(?:\"([^\"]+)\"|'([^']+)'"
296
- valueRxString += "|\\(([^\\)]+)\\)|\\[([^\\]]+)\\]|<([^>]+)>" if @include_brackets
297
- valueRxString += "|((?:\\\\ |[^" + @field_split + "])+))"
298
- @scan_re = Regexp.new("((?:\\\\ |[^" + @field_split + @value_split + "])+)\s*[" + @value_split + "]\s*" + valueRxString)
299
- @value_split_re = /[#{@value_split}]/
340
+ field_split = Regexp::compile(@field_split_pattern || /[#{@field_split}]/)
341
+ value_split = Regexp::compile(@value_split_pattern || /[#{@value_split}]/)
342
+
343
+ optional_whitespace = /\s*/
344
+ eof = /$/
345
+
346
+ value_pattern = begin
347
+ # each component expression within value_pattern _must_ capture exactly once.
348
+ value_patterns = []
349
+
350
+ value_patterns << quoted_capture(%q(")) # quoted double
351
+ value_patterns << quoted_capture(%q(')) # quoted single
352
+ if @include_brackets
353
+ value_patterns << quoted_capture('(', ')') # bracketed paren
354
+ value_patterns << quoted_capture('[', ']') # bracketed square
355
+ value_patterns << quoted_capture('<', '>') # bracketed angle
356
+ end
357
+
358
+ # an unquoted value is a _captured_ sequence of characters or escaped spaces before a `field_split` or EOF.
359
+ value_patterns << /((?:\\ |.)+?)(?=#{Regexp::union(field_split, eof)})/
360
+
361
+ Regexp.union(*value_patterns)
362
+ end
363
+
364
+
365
+ # a key is a _captured_ sequence of characters or escaped spaces before optional whitespace
366
+ # and followed by either a `value_split`, a `field_split`, or EOF.
367
+ key_pattern = /((?:\\ |.)+?)(?=#{optional_whitespace}#{Regexp::union(value_split, field_split, eof)})/
368
+
369
+ @scan_re = /#{field_split}?#{key_pattern}#{optional_whitespace}(?:#{value_split}#{optional_whitespace}#{value_pattern})?(?=#{Regexp::union(field_split, eof)})/
370
+ @value_split_re = value_split
371
+
372
+ @logger.debug? && @logger.debug("KV scan regex", :regex => @scan_re.inspect)
300
373
  end
301
374
 
302
375
  def filter(event)
@@ -335,6 +408,26 @@ class LogStash::Filters::KV < LogStash::Filters::Base
335
408
  s =~ @value_split_re
336
409
  end
337
410
 
411
+ # Helper function for generating single-capture `Regexp` that, when matching a string bound by the given quotes
412
+ # or brackets, will capture the content that is between the quotes or brackets.
413
+ #
414
+ # @api private
415
+ # @param quote_sequence [String] a character sequence that begins a quoted expression
416
+ # @param close_quote_sequence [String] a character sequence that ends a quoted expression; (default: quote_sequence)
417
+ # @return [Regexp] with a single capture group representing content that is between the given quotes
418
+ def quoted_capture(quote_sequence, close_quote_sequence=quote_sequence)
419
+ fail('quote_sequence must be non-empty!') if quote_sequence.nil? || quote_sequence.empty?
420
+ fail('close_quote_sequence must be non-empty!') if close_quote_sequence.nil? || close_quote_sequence.empty?
421
+
422
+ open_pattern = /#{Regexp.quote(quote_sequence)}/
423
+ close_pattern = /#{Regexp.quote(close_quote_sequence)}/
424
+
425
+ # matches a sequence of zero or more characters that is followed by the `close_quote_sequence`
426
+ quoted_value_pattern = /(?:.)*?(?=#{Regexp.quote(close_quote_sequence)})/
427
+
428
+ /#{open_pattern}(#{quoted_value_pattern})#{close_pattern}/
429
+ end
430
+
338
431
  def transform(text, method)
339
432
  case method
340
433
  when TRANSFORM_LOWERCASE_KEY
@@ -354,8 +447,10 @@ class LogStash::Filters::KV < LogStash::Filters::Base
354
447
  include_keys = @include_keys.map{|key| event.sprintf(key)}
355
448
  exclude_keys = @exclude_keys.map{|key| event.sprintf(key)}
356
449
 
357
- text.scan(@scan_re) do |key, v1, v2, v3, v4, v5, v6|
358
- value = v1 || v2 || v3 || v4 || v5 || v6
450
+ text.scan(@scan_re) do |key, *value_candidates|
451
+ value = value_candidates.compact.first
452
+ next if value.nil? || value.empty?
453
+
359
454
  key = @trim_key ? key.gsub(@trim_key_re, "") : key
360
455
  key = @remove_char_key ? key.gsub(@remove_char_key_re, "") : key
361
456
  key = @transform_key ? transform(key, @transform_key) : key
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-kv'
4
- s.version = '4.0.3'
4
+ s.version = '4.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses key-value pairs"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -772,3 +772,129 @@ describe LogStash::Filters::KV do
772
772
  end
773
773
  end
774
774
  end
775
+
776
+ describe "multi character splitting" do
777
+ subject do
778
+ plugin = LogStash::Filters::KV.new(options)
779
+ plugin.register
780
+ plugin
781
+ end
782
+
783
+ let(:data) { {"message" => message} }
784
+ let(:event) { LogStash::Event.new(data) }
785
+
786
+ shared_examples "parsing all fields and values" do
787
+ it "parses all fields and values" do
788
+ subject.filter(event)
789
+ expect(event.get("hello")).to eq("world")
790
+ expect(event.get("foo")).to eq("bar")
791
+ expect(event.get("baz")).to eq("fizz")
792
+ expect(event.get("doublequoted")).to eq("hello world")
793
+ expect(event.get("singlequoted")).to eq("hello world")
794
+ expect(event.get("bracketsone")).to eq("hello world")
795
+ expect(event.get("bracketstwo")).to eq("hello world")
796
+ expect(event.get("bracketsthree")).to eq("hello world")
797
+ end
798
+ end
799
+
800
+ context "empty value_split_pattern" do
801
+ let(:options) { { "value_split_pattern" => "" } }
802
+ it "should raise ConfigurationError" do
803
+ expect{subject}.to raise_error(LogStash::ConfigurationError)
804
+ end
805
+ end
806
+
807
+ context "empty field_split_pattern" do
808
+ let(:options) { { "field_split_pattern" => "" } }
809
+ it "should raise ConfigurationError" do
810
+ expect{subject}.to raise_error(LogStash::ConfigurationError)
811
+ end
812
+ end
813
+
814
+ context "single split" do
815
+ let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
816
+ let(:options) {
817
+ {
818
+ "field_split" => " ",
819
+ "value_split" => ":",
820
+ }
821
+ }
822
+ it_behaves_like "parsing all fields and values"
823
+ end
824
+
825
+ context "value split multi" do
826
+ let(:message) { "hello::world foo::bar baz::fizz doublequoted::\"hello world\" singlequoted::'hello world' bracketsone::(hello world) bracketstwo::[hello world] bracketsthree::<hello world>" }
827
+ let(:options) {
828
+ {
829
+ "field_split" => " ",
830
+ "value_split_pattern" => "::",
831
+ }
832
+ }
833
+ it_behaves_like "parsing all fields and values"
834
+ end
835
+
836
+ context "field and value split multi" do
837
+ let(:message) { "hello::world__foo::bar__baz::fizz__doublequoted::\"hello world\"__singlequoted::'hello world'__bracketsone::(hello world)__bracketstwo::[hello world]__bracketsthree::<hello world>" }
838
+ let(:options) {
839
+ {
840
+ "field_split_pattern" => "__",
841
+ "value_split_pattern" => "::",
842
+ }
843
+ }
844
+ it_behaves_like "parsing all fields and values"
845
+ end
846
+
847
+ context "field and value split multi with regex" do
848
+ let(:message) { "hello:world_foo::bar__baz:::fizz___doublequoted:::\"hello world\"____singlequoted:::::'hello world'____bracketsone:::(hello world)__bracketstwo:[hello world]_bracketsthree::::::<hello world>" }
849
+ let(:options) {
850
+ {
851
+ "field_split_pattern" => "_+",
852
+ "value_split_pattern" => ":+",
853
+ }
854
+ }
855
+ it_behaves_like "parsing all fields and values"
856
+ end
857
+
858
+ context "field and value split multi using singe char" do
859
+ let(:message) { "hello:world foo:bar baz:fizz doublequoted:\"hello world\" singlequoted:'hello world' bracketsone:(hello world) bracketstwo:[hello world] bracketsthree:<hello world>" }
860
+ let(:options) {
861
+ {
862
+ "field_split_pattern" => " ",
863
+ "value_split_pattern" => ":",
864
+ }
865
+ }
866
+ it_behaves_like "parsing all fields and values"
867
+ end
868
+
869
+ context "field and value split multi using escaping" do
870
+ let(:message) { "hello++world??foo++bar??baz++fizz??doublequoted++\"hello world\"??singlequoted++'hello world'??bracketsone++(hello world)??bracketstwo++[hello world]??bracketsthree++<hello world>" }
871
+ let(:options) {
872
+ {
873
+ "field_split_pattern" => "\\?\\?",
874
+ "value_split_pattern" => "\\+\\+",
875
+ }
876
+ }
877
+ it_behaves_like "parsing all fields and values"
878
+ end
879
+
880
+
881
+ context "example from @guyboertje in #15" do
882
+ let(:message) { 'key1: val1; key2: val2; key3: https://site/?g={......"...; CLR rv:11.0)"..}; key4: val4;' }
883
+ let(:options) {
884
+ {
885
+ "field_split_pattern" => ";\s*(?=key.+?:)|;$",
886
+ "value_split_pattern" => ":\s+",
887
+ }
888
+ }
889
+
890
+ it "parses all fields and values" do
891
+ subject.filter(event)
892
+
893
+ expect(event.get("key1")).to eq("val1")
894
+ expect(event.get("key2")).to eq("val2")
895
+ expect(event.get("key3")).to eq("https://site/?g={......\"...; CLR rv:11.0)\"..}")
896
+ expect(event.get("key4")).to eq("val4")
897
+ end
898
+ end
899
+
900
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-kv
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.3
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.6.11
87
+ rubygems_version: 2.6.13
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: Parses key-value pairs