logstash-filter-grok 4.1.1 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6b0f1fa69a37ff0ac0c1d44d599c1d09abb706f28b12ed8bdf0299ef82eb461
4
- data.tar.gz: c6b6941aa1c16a5594ec7f30b08eaec2b5e1ddc7e2e1e41c541248520d2be852
3
+ metadata.gz: 9a139ac82c3147d778c2f4510cf6ab077c6c4e73adeae87a0b4058623c4a0619
4
+ data.tar.gz: 02300cea3c6a17e10947cf63a7dae596c1ef1fc1c5114f3834ac784536a2941a
5
5
  SHA512:
6
- metadata.gz: f0917c22df8a3f0f14b684e232f5b1eb9213aa36f5450ac4e28eb3ac547bac510d3d6baf2134a77fc181fdb2f94d063b91a1cba90982f25cdceaaffe9a2b377b
7
- data.tar.gz: 03c469b35434026dd05651b624122020c8f98ef4a2cf7a156a6ab03aee704575e18c3540ec5c36d6f5235609e6ee3551168e094bded7120860326b339703a97e
6
+ metadata.gz: e822b6f1aca31141d7c3d553167a27ea1c11ba41e39dd9553fdbecfa3472780ccca75054b2247985261634efb3263a62fb7ca87f5762e418f744ef45bf994889
7
+ data.tar.gz: 990df0541f9a5cdb09d6fea2060c1c68e901e7cdf1cdb0fd45cfcf32dfa0710da3eeddd7fb3400441489ccc1741f0380daf5934b15e0fbba9ccc2d4a6b370c37
@@ -1,3 +1,6 @@
1
+ ## 4.2.0
2
+ - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153)
3
+
1
4
  ## 4.1.1
2
5
  - Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148)
3
6
 
@@ -195,6 +195,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
195
195
  | <<plugins-{type}s-{plugin}-tag_on_failure>> |<<array,array>>|No
196
196
  | <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
197
197
  | <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
198
+ | <<plugins-{type}s-{plugin}-timeout_scope>> |<<string,string>>|No
198
199
  |=======================================================================
199
200
 
200
201
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -356,6 +357,22 @@ This will never timeout early, but may take a little longer to timeout.
356
357
  Actual timeout is approximate based on a 250ms quantization.
357
358
  Set to 0 to disable timeouts
358
359
 
360
+ [id="plugins-{type}s-{plugin}-timeout_scope"]
361
+ ===== `timeout_scope`
362
+
363
+ * Value type is <<string,string>>
364
+ * Default value is `"pattern"`
365
+ * Supported values are `"pattern"` and `"event"`
366
+
367
+ When multiple patterns are provided to <<plugins-{type}s-{plugin}-match>>,
368
+ the timeout has historically applied to _each_ pattern, incurring overhead
369
+ for each and every pattern that is attempted; when the grok filter is
370
+ configured with `timeout_scope => event`, the plugin instead enforces
371
+ a single timeout across all attempted matches on the event, so it can
372
+ achieve similar safeguard against runaway matchers with significantly
373
+ less overhead.
374
+
375
+ It's usually better to scope the timeout for the whole event.
359
376
 
360
377
 
361
378
  [id="plugins-{type}s-{plugin}-common-options"]
@@ -140,7 +140,7 @@
140
140
  # `SYSLOGBASE` pattern which itself is defined by other patterns.
141
141
  #
142
142
  # Another option is to define patterns _inline_ in the filter using `pattern_definitions`.
143
- # This is mostly for convenience and allows user to define a pattern which can be used just in that
143
+ # This is mostly for convenience and allows user to define a pattern which can be used just in that
144
144
  # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter.
145
145
  #
146
146
  class LogStash::Filters::Grok < LogStash::Filters::Base
@@ -168,7 +168,7 @@
168
168
  # necessarily need to define this yourself unless you are adding additional
169
169
  # patterns. You can point to multiple pattern directories using this setting.
170
170
  # Note that Grok will read all files in the directory matching the patterns_files_glob
171
- # and assume it's a pattern file (including any tilde backup files).
171
+ # and assume it's a pattern file (including any tilde backup files).
172
172
  # [source,ruby]
173
173
  # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"]
174
174
  #
@@ -215,6 +215,16 @@
215
215
  # Set to 0 to disable timeouts
216
216
  config :timeout_millis, :validate => :number, :default => 30000
217
217
 
218
+ # When multiple patterns are provided to `match`,
219
+ # the timeout has historically applied to _each_ pattern, incurring overhead
220
+ # for each and every pattern that is attempted; when the grok filter is
221
+ # configured with `timeout_scope => 'event'`, the plugin instead enforces
222
+ # a single timeout across all attempted matches on the event, so it can
223
+ # achieve similar safeguard against runaway matchers with significantly
224
+ # less overhead.
225
+ # It's usually better to scope the timeout for the whole event.
226
+ config :timeout_scope, :validate => %w(pattern event), :default => "pattern"
227
+
218
228
  # Tag to apply if a grok regexp times out.
219
229
  config :tag_on_timeout, :validate => :string, :default => '_groktimeout'
220
230
 
@@ -278,10 +288,8 @@
278
288
  @match_counter = metric.counter(:matches)
279
289
  @failure_counter = metric.counter(:failures)
280
290
 
281
- # divide by float to allow fractionnal seconds, the Timeout class timeout value is in seconds but the underlying
282
- # executor resolution is in microseconds so fractionnal second parameter down to microseconds is possible.
283
- # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
284
- @timeout_seconds = @timeout_millis / 1000.0
291
+ @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE
292
+ @matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self)
285
293
  end # def register
286
294
 
287
295
  def filter(event)
@@ -334,25 +342,56 @@
334
342
  end
335
343
 
336
344
  def match_against_groks(groks, field, input, event)
337
- input = input.to_s
338
- matched = false
339
- groks.each do |grok|
340
- # Convert anything else to string (number, hash, etc)
341
- matched = grok_till_timeout(grok, field, input)
342
- if matched
343
- grok.capture(matched) {|field, value| handle(field, value, event)}
344
- break if @break_on_match
345
+ # Convert anything else to string (number, hash, etc)
346
+ context = GrokContext.new(field, input.to_s)
347
+ @matcher.match(context, groks, event, @break_on_match)
348
+ end
349
+
350
+ # Internal (base) helper to handle the global timeout switch.
351
+ # @private
352
+ class Matcher
353
+
354
+ def initialize(filter)
355
+ @filter = filter
356
+ end
357
+
358
+ def match(context, groks, event, break_on_match)
359
+ matched = false
360
+
361
+ groks.each do |grok|
362
+ context.set_grok(grok)
363
+
364
+ matched = execute(context, grok)
365
+ if matched
366
+ grok.capture(matched) { |field, value| @filter.handle(field, value, event) }
367
+ break if break_on_match
368
+ end
345
369
  end
370
+
371
+ matched
372
+ end
373
+
374
+ protected
375
+
376
+ def execute(context, grok)
377
+ grok.execute(context.input)
346
378
  end
347
-
348
- matched
379
+
349
380
  end
350
381
 
351
- def grok_till_timeout(grok, field, value)
352
- begin
353
- @timeout_seconds > 0.0 ? Timeout.timeout(@timeout_seconds, TimeoutError) { grok.execute(value) } : grok.execute(value)
354
- rescue TimeoutError
355
- raise GrokTimeoutException.new(grok, field, value)
382
+ # @private
383
+ class EventTimeoutMatcher < Matcher
384
+ # @override
385
+ def match(context, groks, event, break_on_match)
386
+ @filter.with_timeout(context) { super }
387
+ end
388
+ end
389
+
390
+ # @private
391
+ class PatternTimeoutMatcher < Matcher
392
+ # @override
393
+ def execute(context, grok)
394
+ @filter.with_timeout(context) { super }
356
395
  end
357
396
  end
358
397
 
@@ -378,6 +417,7 @@
378
417
  end
379
418
  end
380
419
  end
420
+ public :handle
381
421
 
382
422
  def patterns_files_from_paths(paths, glob)
383
423
  patternfiles = []
@@ -438,4 +478,52 @@
438
478
  end
439
479
  end
440
480
  end
481
+
482
+ def with_timeout(context, &block)
483
+ @timeout.exec(&block)
484
+ rescue TimeoutError => error
485
+ handle_timeout(context, error)
486
+ end
487
+ public :with_timeout
488
+
489
+ def handle_timeout(context, error)
490
+ raise GrokTimeoutException.new(context.grok, context.field, context.input)
491
+ end
492
+
493
+ # @private
494
+ class GrokContext
495
+ attr_reader :grok, :field, :input
496
+
497
+ def initialize(field, input)
498
+ @field = field
499
+ @input = input
500
+ end
501
+
502
+ def set_grok(grok)
503
+ @grok = grok
504
+ end
505
+ end
506
+
507
+ # @private
508
+ class NoopTimeout
509
+ INSTANCE = new
510
+
511
+ def exec
512
+ yield
513
+ end
514
+ end
515
+
516
+ # @private
517
+ class RubyTimeout
518
+ def initialize(timeout_millis)
519
+ # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying
520
+ # executor resolution is in microseconds so fractional second parameter down to microseconds is possible.
521
+ # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
522
+ @timeout_seconds = timeout_millis / 1000.0
523
+ end
524
+
525
+ def exec(&block)
526
+ Timeout.timeout(@timeout_seconds, TimeoutError, &block)
527
+ end
528
+ end
441
529
  end # class LogStash::Filters::Grok
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-grok'
4
- s.version = '4.1.1'
4
+ s.version = '4.2.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses unstructured event data into fields"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -199,7 +199,7 @@ describe LogStash::Filters::Grok do
199
199
 
200
200
  sample "400 454.33" do
201
201
  insist { subject.get("foo") } == 400
202
- insist { subject.get("foo") }.is_a?(Fixnum)
202
+ insist { subject.get("foo") }.is_a?(Integer)
203
203
  insist { subject.get("bar") } == 454.33
204
204
  insist { subject.get("bar") }.is_a?(Float)
205
205
  end
@@ -412,7 +412,7 @@ describe LogStash::Filters::Grok do
412
412
  filter {
413
413
  grok {
414
414
  match => {
415
- message => "(.*a){30}"
415
+ "message" => "(.*a){30}"
416
416
  }
417
417
  timeout_millis => 100
418
418
  }
@@ -425,6 +425,51 @@ describe LogStash::Filters::Grok do
425
425
  end
426
426
  end
427
427
 
428
+ describe "no timeout on failure with multiple patterns (when timeout not grouped)" do
429
+ config <<-CONFIG
430
+ filter {
431
+ grok {
432
+ match => {
433
+ "message" => [
434
+ "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
435
+ "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
436
+ "(.*a){20}"
437
+ ]
438
+ }
439
+ timeout_millis => 500
440
+ timeout_scope => 'pattern'
441
+ }
442
+ }
443
+ CONFIG
444
+
445
+ sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
446
+ insist { subject.get("tags") }.nil?
447
+ end
448
+ end
449
+
450
+ describe "timeout on grouped (multi-pattern) failure" do
451
+ config <<-CONFIG
452
+ filter {
453
+ grok {
454
+ match => {
455
+ "message" => [
456
+ "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
457
+ "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
458
+ "(.*a){20}"
459
+ ]
460
+ }
461
+ timeout_millis => 500
462
+ timeout_scope => 'event'
463
+ }
464
+ }
465
+ CONFIG
466
+
467
+ sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
468
+ expect(subject.get("tags")).to include("_groktimeout")
469
+ expect(subject.get("tags")).not_to include("_grokparsefailure")
470
+ end
471
+ end
472
+
428
473
  describe "tagging on failure" do
429
474
  config <<-CONFIG
430
475
  filter {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.1
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-08 00:00:00.000000000 Z
11
+ date: 2019-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement