logstash-filter-grok 4.0.4 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16a3453f2bf94d8eb76f5cb9127750ef1e1f7b801d53265e1d862164645b6adf
4
- data.tar.gz: d5f193a61bc62ab63ecab9b5f912fd98418b25ab045998f39769e9c50dcd7938
3
+ metadata.gz: 228ed753ac1ef592e06994285f15a05f8000ef24b70e12b13176f64d9ac9761e
4
+ data.tar.gz: 31c62f48ac6240644c0e4e0eebd55e80944b5f693769f2a204540b4cb05dfed2
5
5
  SHA512:
6
- metadata.gz: b18bb87b598ff1310d0cdd7188f3ab2c07a4888dfb1846bc81d94501c13791e55e106e3d9b1b888bc48ccccfc99a5df40a84f52672d1282b4d1c76e92a4f14e3
7
- data.tar.gz: 397b8f0c2acd590dfab7db3efc5f79579dc64ce39d0c9df99acc5752877db6cb1477abab689737a923eaf2d1a7adb1fc52305390fd1ed6408cda01158e6b3dc8
6
+ metadata.gz: cb7923ffbcc68987bcee124bc4c85155db5e99ecbd74baf9ef27daa1e400f7de0c7072cfcfe460e1697230b4a1adbedeb98d3f5943e3a73d9f0cce5f152a0b63
7
+ data.tar.gz: 5465de021ca4e73f95cd2ee3ca5d45bdf2cbc6e116ef16f436f89231911dd1332aeb26e37d3e16a409a8370c0072389d82ee5dcd69717c73fc2e4032cb1081d0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 4.1.0
2
+ - Changed timeout handling using the Timeout class [#147](https://github.com/logstash-plugins/logstash-filter-grok/pull/147)
3
+
1
4
  ## 4.0.4
2
5
  - Added info and link to documentation for logstash-filter-dissect as another option for extracting unstructured event data into fields
3
6
  [#144](https://github.com/logstash-plugins/logstash-filter-grok/issues/144)
@@ -5,6 +5,7 @@
5
5
  require "logstash/patterns/core"
6
6
  require "grok-pure" # rubygem 'jls-grok'
7
7
  require "set"
8
+ require "timeout"
8
9
 
9
10
  # Parse arbitrary text and structure it.
10
11
  #
@@ -144,8 +145,6 @@
144
145
  #
145
146
  class LogStash::Filters::Grok < LogStash::Filters::Base
146
147
  config_name "grok"
147
- require "logstash/filters/grok/timeout_enforcer"
148
- require "logstash/filters/grok/timeout_exception"
149
148
 
150
149
  # A hash of matches of field => value
151
150
  #
@@ -237,8 +236,6 @@
237
236
  # will be parsed and `hello world` will overwrite the original message.
238
237
  config :overwrite, :validate => :array, :default => []
239
238
 
240
- attr_reader :timeout_enforcer
241
-
242
239
  # Register default pattern paths
243
240
  @@patterns_path ||= Set.new
244
241
  @@patterns_path += [
@@ -246,14 +243,10 @@
246
243
  LogStash::Environment.pattern_path("*")
247
244
  ]
248
245
 
249
- public
250
246
  def register
251
247
  # a cache of capture name handler methods.
252
248
  @handlers = {}
253
249
 
254
- @timeout_enforcer = TimeoutEnforcer.new(@logger, @timeout_millis * 1000000)
255
- @timeout_enforcer.start! unless @timeout_millis == 0
256
-
257
250
  @patternfiles = []
258
251
 
259
252
  # Have @@patterns_path show first. Last-in pattern definitions win; this
@@ -284,9 +277,13 @@
284
277
  end # @match.each
285
278
  @match_counter = metric.counter(:matches)
286
279
  @failure_counter = metric.counter(:failures)
280
+
281
+ # divide by float to allow fractionnal seconds, the Timeout class timeout value is in seconds but the underlying
282
+ # executor resolution is in microseconds so fractionnal second parameter down to microseconds is possible.
283
+ # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
284
+ @timeout_seconds = @timeout_millis / 1000.0
287
285
  end # def register
288
286
 
289
- public
290
287
  def filter(event)
291
288
  matched = false
292
289
 
@@ -309,13 +306,17 @@
309
306
  end
310
307
 
311
308
  @logger.debug? and @logger.debug("Event now: ", :event => event)
312
- rescue ::LogStash::Filters::Grok::TimeoutException => e
309
+ rescue GrokTimeoutException => e
313
310
  @logger.warn(e.message)
314
311
  metric.increment(:timeouts)
315
312
  event.tag(@tag_on_timeout)
316
313
  end # def filter
317
314
 
315
+ def close
316
+ end
317
+
318
318
  private
319
+
319
320
  def match(groks, field, event)
320
321
  input = event.get(field)
321
322
  if input.is_a?(Array)
@@ -331,15 +332,13 @@
331
332
  @logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name)
332
333
  return false
333
334
  end
334
-
335
- private
335
+
336
336
  def match_against_groks(groks, field, input, event)
337
337
  input = input.to_s
338
338
  matched = false
339
339
  groks.each do |grok|
340
340
  # Convert anything else to string (number, hash, etc)
341
-
342
- matched = @timeout_enforcer.grok_till_timeout(grok, field, input)
341
+ matched = grok_till_timeout(grok, field, input)
343
342
  if matched
344
343
  grok.capture(matched) {|field, value| handle(field, value, event)}
345
344
  break if @break_on_match
@@ -349,7 +348,14 @@
349
348
  matched
350
349
  end
351
350
 
352
- private
351
+ def grok_till_timeout(grok, field, value)
352
+ begin
353
+ @timeout_seconds > 0.0 ? Timeout.timeout(@timeout_seconds, TimeoutError) { grok.execute(value) } : grok.execute(value)
354
+ rescue TimeoutError
355
+ raise GrokTimeoutException.new(grok, field, value)
356
+ end
357
+ end
358
+
353
359
  def handle(field, value, event)
354
360
  return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures
355
361
 
@@ -373,7 +379,6 @@
373
379
  end
374
380
  end
375
381
 
376
- private
377
382
  def patterns_files_from_paths(paths, glob)
378
383
  patternfiles = []
379
384
  @logger.debug("Grok patterns path", :paths => paths)
@@ -394,7 +399,6 @@
394
399
  patternfiles
395
400
  end # def patterns_files_from_paths
396
401
 
397
- private
398
402
  def add_patterns_from_files(paths, grok)
399
403
  paths.each do |path|
400
404
  if !File.exists?(path)
@@ -404,7 +408,6 @@
404
408
  end
405
409
  end # def add_patterns_from_files
406
410
 
407
- private
408
411
  def add_patterns_from_inline_definition(pattern_definitions, grok)
409
412
  pattern_definitions.each do |name, pattern|
410
413
  next if pattern.nil?
@@ -412,8 +415,27 @@
412
415
  end
413
416
  end
414
417
 
415
- def close
416
- @timeout_enforcer.stop!
417
- end
418
+ class TimeoutError < RuntimeError; end
419
+
420
+ class GrokTimeoutException < Exception
421
+ attr_reader :grok, :field, :value
418
422
 
423
+ def initialize(grok, field, value)
424
+ @grok = grok
425
+ @field = field
426
+ @value = value
427
+ end
428
+
429
+ def message
430
+ "Timeout executing grok '#{@grok.pattern}' against field '#{field}' with value '#{trunc_value}'!"
431
+ end
432
+
433
+ def trunc_value
434
+ if value.size <= 255 # If no more than 255 chars
435
+ value
436
+ else
437
+ "Value too large to output (#{value.bytesize} bytes)! First 255 chars are: #{value[0..255]}"
438
+ end
439
+ end
440
+ end
419
441
  end # class LogStash::Filters::Grok
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-grok'
4
- s.version = '4.0.4'
4
+ s.version = '4.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses unstructured event data into fields"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -850,26 +850,9 @@ describe LogStash::Filters::Grok do
850
850
  plugin.register
851
851
  end
852
852
 
853
- it "should start the timeout enforcer" do
854
- expect(plugin.timeout_enforcer.running).to be true
855
- end
856
-
857
- context "with the timeout enforcer disabled" do
858
- let(:config) { super.merge("timeout_millis" => 0) }
859
-
860
- it "should not start the timeout enforcer" do
861
- expect(plugin.timeout_enforcer.running).to be false
862
- end
863
- end
864
-
865
853
  it "should close cleanly" do
866
854
  expect { plugin.do_close }.not_to raise_error
867
855
  end
868
-
869
- it "should stop the timeout enforcer" do
870
- plugin.do_close
871
- expect(plugin.timeout_enforcer.running).to be false
872
- end
873
856
  end
874
857
 
875
858
  describe "after grok when the event is JSON serialised the field values are unchanged" do
@@ -929,4 +912,37 @@ describe LogStash::Filters::Grok do
929
912
  end
930
913
  end
931
914
 
932
- end
915
+
916
+ describe "direct plugin testing" do
917
+ subject do
918
+ plugin = LogStash::Filters::Grok.new(options)
919
+ plugin.register
920
+ plugin
921
+ end
922
+
923
+ let(:data) { {"message" => message} }
924
+ let(:event) { LogStash::Event.new(data) }
925
+
926
+ context 'when timeouts are explicitly disabled' do
927
+ let(:options) do
928
+ {
929
+ "timeout_millis" => 0
930
+ }
931
+ end
932
+
933
+ context 'when given a pathological input' do
934
+ let(:message) { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
935
+ let(:options) { super().merge("match" => { "message" => "(.*a){30}" }) }
936
+
937
+ it 'blocks for at least 3 seconds' do
938
+ blocking_exception_class = Class.new(::Exception) # avoid RuntimeError
939
+ expect do
940
+ Timeout.timeout(3, blocking_exception_class) do
941
+ subject.filter(event)
942
+ end
943
+ end.to raise_exception(blocking_exception_class)
944
+ end
945
+ end
946
+ end
947
+ end
948
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-grok
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.4
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-19 00:00:00.000000000 Z
11
+ date: 2019-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -116,8 +116,6 @@ files:
116
116
  - README.md
117
117
  - docs/index.asciidoc
118
118
  - lib/logstash/filters/grok.rb
119
- - lib/logstash/filters/grok/timeout_enforcer.rb
120
- - lib/logstash/filters/grok/timeout_exception.rb
121
119
  - logstash-filter-grok.gemspec
122
120
  - spec/filters/grok_spec.rb
123
121
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
@@ -1,72 +0,0 @@
1
- class LogStash::Filters::Grok::TimeoutEnforcer
2
- def initialize(logger, timeout_nanos)
3
- @logger = logger
4
- @running = java.util.concurrent.atomic.AtomicBoolean.new(false)
5
- @timeout_nanos = timeout_nanos
6
-
7
- # Stores running matches with their start time, this is used to cancel long running matches
8
- # Is a map of Thread => start_time
9
- @threads_to_start_time = java.util.concurrent.ConcurrentHashMap.new
10
- end
11
-
12
- def running
13
- @running.get()
14
- end
15
-
16
- def grok_till_timeout(grok, field, value)
17
- begin
18
- thread = java.lang.Thread.currentThread()
19
- @threads_to_start_time.put(thread, java.lang.System.nanoTime)
20
- grok.execute(value)
21
- rescue InterruptedRegexpError, java.lang.InterruptedException => e
22
- raise ::LogStash::Filters::Grok::TimeoutException.new(grok, field, value)
23
- ensure
24
- # If the regexp finished, but interrupt was called after, we'll want to
25
- # clear the interrupted status anyway
26
- @threads_to_start_time.remove(thread)
27
- thread.interrupted
28
- end
29
- end
30
-
31
- def start!
32
- @running.set(true)
33
- @timer_thread = Thread.new do
34
- while @running.get()
35
- begin
36
- cancel_timed_out!
37
- rescue Exception => e
38
- @logger.error("Error while attempting to check/cancel excessively long grok patterns",
39
- :message => e.message,
40
- :class => e.class.name,
41
- :backtrace => e.backtrace
42
- )
43
- end
44
- sleep 0.25
45
- end
46
- end
47
- end
48
-
49
- def stop!
50
- @running.set(false)
51
- # Check for the thread mostly for a fast start/shutdown scenario
52
- @timer_thread.join if @timer_thread
53
- end
54
-
55
- private
56
-
57
- def cancel_timed_out!
58
- now = java.lang.System.nanoTime # save ourselves some nanotime calls
59
- @threads_to_start_time.keySet.each do |thread|
60
- # Use compute to lock this value
61
- @threads_to_start_time.computeIfPresent(thread) do |thread, start_time|
62
- if start_time < now && now - start_time > @timeout_nanos
63
- thread.interrupt
64
- nil # Delete the key
65
- else
66
- start_time # preserve the key
67
- end
68
- end
69
- end
70
- end
71
-
72
- end
@@ -1,21 +0,0 @@
1
- class LogStash::Filters::Grok::TimeoutException < Exception
2
- attr_reader :grok, :field, :value
3
-
4
- def initialize(grok=nil, field=nil, value=nil)
5
- @field = field
6
- @value = value
7
- @grok = grok
8
- end
9
-
10
- def message
11
- "Timeout executing grok '#{@grok.pattern}' against field '#{field}' with value '#{trunc_value}'!"
12
- end
13
-
14
- def trunc_value
15
- if value.size <= 255 # If no more than 255 chars
16
- value
17
- else
18
- "Value too large to output (#{value.bytesize} bytes)! First 255 chars are: #{value[0..255]}"
19
- end
20
- end
21
- end