logstash-filter-grok 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6b0f1fa69a37ff0ac0c1d44d599c1d09abb706f28b12ed8bdf0299ef82eb461
4
- data.tar.gz: c6b6941aa1c16a5594ec7f30b08eaec2b5e1ddc7e2e1e41c541248520d2be852
3
+ metadata.gz: 9a139ac82c3147d778c2f4510cf6ab077c6c4e73adeae87a0b4058623c4a0619
4
+ data.tar.gz: 02300cea3c6a17e10947cf63a7dae596c1ef1fc1c5114f3834ac784536a2941a
5
5
  SHA512:
6
- metadata.gz: f0917c22df8a3f0f14b684e232f5b1eb9213aa36f5450ac4e28eb3ac547bac510d3d6baf2134a77fc181fdb2f94d063b91a1cba90982f25cdceaaffe9a2b377b
7
- data.tar.gz: 03c469b35434026dd05651b624122020c8f98ef4a2cf7a156a6ab03aee704575e18c3540ec5c36d6f5235609e6ee3551168e094bded7120860326b339703a97e
6
+ metadata.gz: e822b6f1aca31141d7c3d553167a27ea1c11ba41e39dd9553fdbecfa3472780ccca75054b2247985261634efb3263a62fb7ca87f5762e418f744ef45bf994889
7
+ data.tar.gz: 990df0541f9a5cdb09d6fea2060c1c68e901e7cdf1cdb0fd45cfcf32dfa0710da3eeddd7fb3400441489ccc1741f0380daf5934b15e0fbba9ccc2d4a6b370c37
@@ -1,3 +1,6 @@
1
+ ## 4.2.0
2
+ - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153)
3
+
1
4
  ## 4.1.1
2
5
  - Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148)
3
6
 
@@ -195,6 +195,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
195
195
  | <<plugins-{type}s-{plugin}-tag_on_failure>> |<<array,array>>|No
196
196
  | <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
197
197
  | <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
198
+ | <<plugins-{type}s-{plugin}-timeout_scope>> |<<string,string>>|No
198
199
  |=======================================================================
199
200
 
200
201
  Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
@@ -356,6 +357,22 @@ This will never timeout early, but may take a little longer to timeout.
356
357
  Actual timeout is approximate based on a 250ms quantization.
357
358
  Set to 0 to disable timeouts
358
359
 
360
+ [id="plugins-{type}s-{plugin}-timeout_scope"]
361
+ ===== `timeout_scope`
362
+
363
+ * Value type is <<string,string>>
364
+ * Default value is `"pattern"`
365
+ * Supported values are `"pattern"` and `"event"`
366
+
367
+ When multiple patterns are provided to <<plugins-{type}s-{plugin}-match>>,
368
+ the timeout has historically applied to _each_ pattern, incurring overhead
369
+ for each and every pattern that is attempted; when the grok filter is
370
+ configured with `timeout_scope => event`, the plugin instead enforces
371
+ a single timeout across all attempted matches on the event, so it can
372
+ achieve similar safeguard against runaway matchers with significantly
373
+ less overhead.
374
+
375
+ It's usually better to scope the timeout for the whole event.
359
376
 
360
377
 
361
378
  [id="plugins-{type}s-{plugin}-common-options"]
@@ -140,7 +140,7 @@
140
140
  # `SYSLOGBASE` pattern which itself is defined by other patterns.
141
141
  #
142
142
  # Another option is to define patterns _inline_ in the filter using `pattern_definitions`.
143
- # This is mostly for convenience and allows user to define a pattern which can be used just in that
143
+ # This is mostly for convenience and allows user to define a pattern which can be used just in that
144
144
  # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter.
145
145
  #
146
146
  class LogStash::Filters::Grok < LogStash::Filters::Base
@@ -168,7 +168,7 @@
168
168
  # necessarily need to define this yourself unless you are adding additional
169
169
  # patterns. You can point to multiple pattern directories using this setting.
170
170
  # Note that Grok will read all files in the directory matching the patterns_files_glob
171
- # and assume it's a pattern file (including any tilde backup files).
171
+ # and assume it's a pattern file (including any tilde backup files).
172
172
  # [source,ruby]
173
173
  # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"]
174
174
  #
@@ -215,6 +215,16 @@
215
215
  # Set to 0 to disable timeouts
216
216
  config :timeout_millis, :validate => :number, :default => 30000
217
217
 
218
+ # When multiple patterns are provided to `match`,
219
+ # the timeout has historically applied to _each_ pattern, incurring overhead
220
+ # for each and every pattern that is attempted; when the grok filter is
221
+ # configured with `timeout_scope => 'event'`, the plugin instead enforces
222
+ # a single timeout across all attempted matches on the event, so it can
223
+ # achieve similar safeguard against runaway matchers with significantly
224
+ # less overhead.
225
+ # It's usually better to scope the timeout for the whole event.
226
+ config :timeout_scope, :validate => %w(pattern event), :default => "pattern"
227
+
218
228
  # Tag to apply if a grok regexp times out.
219
229
  config :tag_on_timeout, :validate => :string, :default => '_groktimeout'
220
230
 
@@ -278,10 +288,8 @@
278
288
  @match_counter = metric.counter(:matches)
279
289
  @failure_counter = metric.counter(:failures)
280
290
 
281
- # divide by float to allow fractionnal seconds, the Timeout class timeout value is in seconds but the underlying
282
- # executor resolution is in microseconds so fractionnal second parameter down to microseconds is possible.
283
- # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
284
- @timeout_seconds = @timeout_millis / 1000.0
291
+ @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE
292
+ @matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self)
285
293
  end # def register
286
294
 
287
295
  def filter(event)
@@ -334,25 +342,56 @@
334
342
  end
335
343
 
336
344
  def match_against_groks(groks, field, input, event)
337
- input = input.to_s
338
- matched = false
339
- groks.each do |grok|
340
- # Convert anything else to string (number, hash, etc)
341
- matched = grok_till_timeout(grok, field, input)
342
- if matched
343
- grok.capture(matched) {|field, value| handle(field, value, event)}
344
- break if @break_on_match
345
+ # Convert anything else to string (number, hash, etc)
346
+ context = GrokContext.new(field, input.to_s)
347
+ @matcher.match(context, groks, event, @break_on_match)
348
+ end
349
+
350
+ # Internal (base) helper to handle the global timeout switch.
351
+ # @private
352
+ class Matcher
353
+
354
+ def initialize(filter)
355
+ @filter = filter
356
+ end
357
+
358
+ def match(context, groks, event, break_on_match)
359
+ matched = false
360
+
361
+ groks.each do |grok|
362
+ context.set_grok(grok)
363
+
364
+ matched = execute(context, grok)
365
+ if matched
366
+ grok.capture(matched) { |field, value| @filter.handle(field, value, event) }
367
+ break if break_on_match
368
+ end
345
369
  end
370
+
371
+ matched
372
+ end
373
+
374
+ protected
375
+
376
+ def execute(context, grok)
377
+ grok.execute(context.input)
346
378
  end
347
-
348
- matched
379
+
349
380
  end
350
381
 
351
- def grok_till_timeout(grok, field, value)
352
- begin
353
- @timeout_seconds > 0.0 ? Timeout.timeout(@timeout_seconds, TimeoutError) { grok.execute(value) } : grok.execute(value)
354
- rescue TimeoutError
355
- raise GrokTimeoutException.new(grok, field, value)
382
+ # @private
383
+ class EventTimeoutMatcher < Matcher
384
+ # @override
385
+ def match(context, groks, event, break_on_match)
386
+ @filter.with_timeout(context) { super }
387
+ end
388
+ end
389
+
390
+ # @private
391
+ class PatternTimeoutMatcher < Matcher
392
+ # @override
393
+ def execute(context, grok)
394
+ @filter.with_timeout(context) { super }
356
395
  end
357
396
  end
358
397
 
@@ -378,6 +417,7 @@
378
417
  end
379
418
  end
380
419
  end
420
+ public :handle
381
421
 
382
422
  def patterns_files_from_paths(paths, glob)
383
423
  patternfiles = []
@@ -438,4 +478,52 @@
438
478
  end
439
479
  end
440
480
  end
481
+
482
+ def with_timeout(context, &block)
483
+ @timeout.exec(&block)
484
+ rescue TimeoutError => error
485
+ handle_timeout(context, error)
486
+ end
487
+ public :with_timeout
488
+
489
+ def handle_timeout(context, error)
490
+ raise GrokTimeoutException.new(context.grok, context.field, context.input)
491
+ end
492
+
493
+ # @private
494
+ class GrokContext
495
+ attr_reader :grok, :field, :input
496
+
497
+ def initialize(field, input)
498
+ @field = field
499
+ @input = input
500
+ end
501
+
502
+ def set_grok(grok)
503
+ @grok = grok
504
+ end
505
+ end
506
+
507
+ # @private
508
+ class NoopTimeout
509
+ INSTANCE = new
510
+
511
+ def exec
512
+ yield
513
+ end
514
+ end
515
+
516
+ # @private
517
+ class RubyTimeout
518
+ def initialize(timeout_millis)
519
+ # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying
520
+ # executor resolution is in microseconds so fractional second parameter down to microseconds is possible.
521
+ # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
522
+ @timeout_seconds = timeout_millis / 1000.0
523
+ end
524
+
525
+ def exec(&block)
526
+ Timeout.timeout(@timeout_seconds, TimeoutError, &block)
527
+ end
528
+ end
441
529
  end # class LogStash::Filters::Grok
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-grok'
4
- s.version = '4.1.1'
4
+ s.version = '4.2.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses unstructured event data into fields"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -199,7 +199,7 @@ describe LogStash::Filters::Grok do
199
199
 
200
200
  sample "400 454.33" do
201
201
  insist { subject.get("foo") } == 400
202
- insist { subject.get("foo") }.is_a?(Fixnum)
202
+ insist { subject.get("foo") }.is_a?(Integer)
203
203
  insist { subject.get("bar") } == 454.33
204
204
  insist { subject.get("bar") }.is_a?(Float)
205
205
  end
@@ -412,7 +412,7 @@ describe LogStash::Filters::Grok do
412
412
  filter {
413
413
  grok {
414
414
  match => {
415
- message => "(.*a){30}"
415
+ "message" => "(.*a){30}"
416
416
  }
417
417
  timeout_millis => 100
418
418
  }
@@ -425,6 +425,51 @@ describe LogStash::Filters::Grok do
425
425
  end
426
426
  end
427
427
 
428
+ describe "no timeout on failure with multiple patterns (when timeout not grouped)" do
429
+ config <<-CONFIG
430
+ filter {
431
+ grok {
432
+ match => {
433
+ "message" => [
434
+ "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
435
+ "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
436
+ "(.*a){20}"
437
+ ]
438
+ }
439
+ timeout_millis => 500
440
+ timeout_scope => 'pattern'
441
+ }
442
+ }
443
+ CONFIG
444
+
445
+ sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
446
+ insist { subject.get("tags") }.nil?
447
+ end
448
+ end
449
+
450
+ describe "timeout on grouped (multi-pattern) failure" do
451
+ config <<-CONFIG
452
+ filter {
453
+ grok {
454
+ match => {
455
+ "message" => [
456
+ "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
457
+ "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
458
+ "(.*a){20}"
459
+ ]
460
+ }
461
+ timeout_millis => 500
462
+ timeout_scope => 'event'
463
+ }
464
+ }
465
+ CONFIG
466
+
467
+ sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
468
+ expect(subject.get("tags")).to include("_groktimeout")
469
+ expect(subject.get("tags")).not_to include("_grokparsefailure")
470
+ end
471
+ end
472
+
428
473
  describe "tagging on failure" do
429
474
  config <<-CONFIG
430
475
  filter {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.1
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-08 00:00:00.000000000 Z
11
+ date: 2019-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement