logstash-filter-grok 4.1.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +17 -0
- data/lib/logstash/filters/grok.rb +109 -21
- data/logstash-filter-grok.gemspec +1 -1
- data/spec/filters/grok_spec.rb +47 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a139ac82c3147d778c2f4510cf6ab077c6c4e73adeae87a0b4058623c4a0619
|
4
|
+
data.tar.gz: 02300cea3c6a17e10947cf63a7dae596c1ef1fc1c5114f3834ac784536a2941a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e822b6f1aca31141d7c3d553167a27ea1c11ba41e39dd9553fdbecfa3472780ccca75054b2247985261634efb3263a62fb7ca87f5762e418f744ef45bf994889
|
7
|
+
data.tar.gz: 990df0541f9a5cdb09d6fea2060c1c68e901e7cdf1cdb0fd45cfcf32dfa0710da3eeddd7fb3400441489ccc1741f0380daf5934b15e0fbba9ccc2d4a6b370c37
|
data/CHANGELOG.md
CHANGED
data/docs/index.asciidoc
CHANGED
@@ -195,6 +195,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
195
195
|
| <<plugins-{type}s-{plugin}-tag_on_failure>> |<<array,array>>|No
|
196
196
|
| <<plugins-{type}s-{plugin}-tag_on_timeout>> |<<string,string>>|No
|
197
197
|
| <<plugins-{type}s-{plugin}-timeout_millis>> |<<number,number>>|No
|
198
|
+
| <<plugins-{type}s-{plugin}-timeout_scope>> |<<string,string>>|No
|
198
199
|
|=======================================================================
|
199
200
|
|
200
201
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -356,6 +357,22 @@ This will never timeout early, but may take a little longer to timeout.
|
|
356
357
|
Actual timeout is approximate based on a 250ms quantization.
|
357
358
|
Set to 0 to disable timeouts
|
358
359
|
|
360
|
+
[id="plugins-{type}s-{plugin}-timeout_scope"]
|
361
|
+
===== `timeout_scope`
|
362
|
+
|
363
|
+
* Value type is <<string,string>>
|
364
|
+
* Default value is `"pattern"`
|
365
|
+
* Supported values are `"pattern"` and `"event"`
|
366
|
+
|
367
|
+
When multiple patterns are provided to <<plugins-{type}s-{plugin}-match>>,
|
368
|
+
the timeout has historically applied to _each_ pattern, incurring overhead
|
369
|
+
for each and every pattern that is attempted; when the grok filter is
|
370
|
+
configured with `timeout_scope => event`, the plugin instead enforces
|
371
|
+
a single timeout across all attempted matches on the event, so it can
|
372
|
+
achieve similar safeguard against runaway matchers with significantly
|
373
|
+
less overhead.
|
374
|
+
|
375
|
+
It's usually better to scope the timeout for the whole event.
|
359
376
|
|
360
377
|
|
361
378
|
[id="plugins-{type}s-{plugin}-common-options"]
|
@@ -140,7 +140,7 @@
|
|
140
140
|
# `SYSLOGBASE` pattern which itself is defined by other patterns.
|
141
141
|
#
|
142
142
|
# Another option is to define patterns _inline_ in the filter using `pattern_definitions`.
|
143
|
-
# This is mostly for convenience and allows user to define a pattern which can be used just in that
|
143
|
+
# This is mostly for convenience and allows user to define a pattern which can be used just in that
|
144
144
|
# filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter.
|
145
145
|
#
|
146
146
|
class LogStash::Filters::Grok < LogStash::Filters::Base
|
@@ -168,7 +168,7 @@
|
|
168
168
|
# necessarily need to define this yourself unless you are adding additional
|
169
169
|
# patterns. You can point to multiple pattern directories using this setting.
|
170
170
|
# Note that Grok will read all files in the directory matching the patterns_files_glob
|
171
|
-
# and assume it's a pattern file (including any tilde backup files).
|
171
|
+
# and assume it's a pattern file (including any tilde backup files).
|
172
172
|
# [source,ruby]
|
173
173
|
# patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"]
|
174
174
|
#
|
@@ -215,6 +215,16 @@
|
|
215
215
|
# Set to 0 to disable timeouts
|
216
216
|
config :timeout_millis, :validate => :number, :default => 30000
|
217
217
|
|
218
|
+
# When multiple patterns are provided to `match`,
|
219
|
+
# the timeout has historically applied to _each_ pattern, incurring overhead
|
220
|
+
# for each and every pattern that is attempted; when the grok filter is
|
221
|
+
# configured with `timeout_scope => 'event'`, the plugin instead enforces
|
222
|
+
# a single timeout across all attempted matches on the event, so it can
|
223
|
+
# achieve similar safeguard against runaway matchers with significantly
|
224
|
+
# less overhead.
|
225
|
+
# It's usually better to scope the timeout for the whole event.
|
226
|
+
config :timeout_scope, :validate => %w(pattern event), :default => "pattern"
|
227
|
+
|
218
228
|
# Tag to apply if a grok regexp times out.
|
219
229
|
config :tag_on_timeout, :validate => :string, :default => '_groktimeout'
|
220
230
|
|
@@ -278,10 +288,8 @@
|
|
278
288
|
@match_counter = metric.counter(:matches)
|
279
289
|
@failure_counter = metric.counter(:failures)
|
280
290
|
|
281
|
-
|
282
|
-
|
283
|
-
# see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
|
284
|
-
@timeout_seconds = @timeout_millis / 1000.0
|
291
|
+
@timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE
|
292
|
+
@matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self)
|
285
293
|
end # def register
|
286
294
|
|
287
295
|
def filter(event)
|
@@ -334,25 +342,56 @@
|
|
334
342
|
end
|
335
343
|
|
336
344
|
def match_against_groks(groks, field, input, event)
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
+
# Convert anything else to string (number, hash, etc)
|
346
|
+
context = GrokContext.new(field, input.to_s)
|
347
|
+
@matcher.match(context, groks, event, @break_on_match)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Internal (base) helper to handle the global timeout switch.
|
351
|
+
# @private
|
352
|
+
class Matcher
|
353
|
+
|
354
|
+
def initialize(filter)
|
355
|
+
@filter = filter
|
356
|
+
end
|
357
|
+
|
358
|
+
def match(context, groks, event, break_on_match)
|
359
|
+
matched = false
|
360
|
+
|
361
|
+
groks.each do |grok|
|
362
|
+
context.set_grok(grok)
|
363
|
+
|
364
|
+
matched = execute(context, grok)
|
365
|
+
if matched
|
366
|
+
grok.capture(matched) { |field, value| @filter.handle(field, value, event) }
|
367
|
+
break if break_on_match
|
368
|
+
end
|
345
369
|
end
|
370
|
+
|
371
|
+
matched
|
372
|
+
end
|
373
|
+
|
374
|
+
protected
|
375
|
+
|
376
|
+
def execute(context, grok)
|
377
|
+
grok.execute(context.input)
|
346
378
|
end
|
347
|
-
|
348
|
-
matched
|
379
|
+
|
349
380
|
end
|
350
381
|
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
382
|
+
# @private
|
383
|
+
class EventTimeoutMatcher < Matcher
|
384
|
+
# @override
|
385
|
+
def match(context, groks, event, break_on_match)
|
386
|
+
@filter.with_timeout(context) { super }
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
# @private
|
391
|
+
class PatternTimeoutMatcher < Matcher
|
392
|
+
# @override
|
393
|
+
def execute(context, grok)
|
394
|
+
@filter.with_timeout(context) { super }
|
356
395
|
end
|
357
396
|
end
|
358
397
|
|
@@ -378,6 +417,7 @@
|
|
378
417
|
end
|
379
418
|
end
|
380
419
|
end
|
420
|
+
public :handle
|
381
421
|
|
382
422
|
def patterns_files_from_paths(paths, glob)
|
383
423
|
patternfiles = []
|
@@ -438,4 +478,52 @@
|
|
438
478
|
end
|
439
479
|
end
|
440
480
|
end
|
481
|
+
|
482
|
+
def with_timeout(context, &block)
|
483
|
+
@timeout.exec(&block)
|
484
|
+
rescue TimeoutError => error
|
485
|
+
handle_timeout(context, error)
|
486
|
+
end
|
487
|
+
public :with_timeout
|
488
|
+
|
489
|
+
def handle_timeout(context, error)
|
490
|
+
raise GrokTimeoutException.new(context.grok, context.field, context.input)
|
491
|
+
end
|
492
|
+
|
493
|
+
# @private
|
494
|
+
class GrokContext
|
495
|
+
attr_reader :grok, :field, :input
|
496
|
+
|
497
|
+
def initialize(field, input)
|
498
|
+
@field = field
|
499
|
+
@input = input
|
500
|
+
end
|
501
|
+
|
502
|
+
def set_grok(grok)
|
503
|
+
@grok = grok
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
# @private
|
508
|
+
class NoopTimeout
|
509
|
+
INSTANCE = new
|
510
|
+
|
511
|
+
def exec
|
512
|
+
yield
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
# @private
|
517
|
+
class RubyTimeout
|
518
|
+
def initialize(timeout_millis)
|
519
|
+
# divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying
|
520
|
+
# executor resolution is in microseconds so fractional second parameter down to microseconds is possible.
|
521
|
+
# see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125
|
522
|
+
@timeout_seconds = timeout_millis / 1000.0
|
523
|
+
end
|
524
|
+
|
525
|
+
def exec(&block)
|
526
|
+
Timeout.timeout(@timeout_seconds, TimeoutError, &block)
|
527
|
+
end
|
528
|
+
end
|
441
529
|
end # class LogStash::Filters::Grok
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-grok'
|
4
|
-
s.version = '4.
|
4
|
+
s.version = '4.2.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses unstructured event data into fields"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/grok_spec.rb
CHANGED
@@ -199,7 +199,7 @@ describe LogStash::Filters::Grok do
|
|
199
199
|
|
200
200
|
sample "400 454.33" do
|
201
201
|
insist { subject.get("foo") } == 400
|
202
|
-
insist { subject.get("foo") }.is_a?(
|
202
|
+
insist { subject.get("foo") }.is_a?(Integer)
|
203
203
|
insist { subject.get("bar") } == 454.33
|
204
204
|
insist { subject.get("bar") }.is_a?(Float)
|
205
205
|
end
|
@@ -412,7 +412,7 @@ describe LogStash::Filters::Grok do
|
|
412
412
|
filter {
|
413
413
|
grok {
|
414
414
|
match => {
|
415
|
-
message => "(.*a){30}"
|
415
|
+
"message" => "(.*a){30}"
|
416
416
|
}
|
417
417
|
timeout_millis => 100
|
418
418
|
}
|
@@ -425,6 +425,51 @@ describe LogStash::Filters::Grok do
|
|
425
425
|
end
|
426
426
|
end
|
427
427
|
|
428
|
+
describe "no timeout on failure with multiple patterns (when timeout not grouped)" do
|
429
|
+
config <<-CONFIG
|
430
|
+
filter {
|
431
|
+
grok {
|
432
|
+
match => {
|
433
|
+
"message" => [
|
434
|
+
"(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
|
435
|
+
"(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
|
436
|
+
"(.*a){20}"
|
437
|
+
]
|
438
|
+
}
|
439
|
+
timeout_millis => 500
|
440
|
+
timeout_scope => 'pattern'
|
441
|
+
}
|
442
|
+
}
|
443
|
+
CONFIG
|
444
|
+
|
445
|
+
sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
|
446
|
+
insist { subject.get("tags") }.nil?
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
describe "timeout on grouped (multi-pattern) failure" do
|
451
|
+
config <<-CONFIG
|
452
|
+
filter {
|
453
|
+
grok {
|
454
|
+
match => {
|
455
|
+
"message" => [
|
456
|
+
"(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}",
|
457
|
+
"(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}",
|
458
|
+
"(.*a){20}"
|
459
|
+
]
|
460
|
+
}
|
461
|
+
timeout_millis => 500
|
462
|
+
timeout_scope => 'event'
|
463
|
+
}
|
464
|
+
}
|
465
|
+
CONFIG
|
466
|
+
|
467
|
+
sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do
|
468
|
+
expect(subject.get("tags")).to include("_groktimeout")
|
469
|
+
expect(subject.get("tags")).not_to include("_grokparsefailure")
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
428
473
|
describe "tagging on failure" do
|
429
474
|
config <<-CONFIG
|
430
475
|
filter {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-grok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|