logstash-filter-aggregate 2.7.2 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/LICENSE +1 -1
- data/docs/index.asciidoc +37 -6
- data/lib/logstash/filters/aggregate.rb +104 -47
- data/logstash-filter-aggregate.gemspec +1 -1
- data/spec/filters/aggregate_spec.rb +22 -1
- data/spec/filters/aggregate_spec_helper.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cbb27a58c0339ae9f7908d18250a5f91c456f6f
|
4
|
+
data.tar.gz: e86b38aa410918fb62cf7050be5fd8b9f44d6ed9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d14e1e14cdf0342db92e06ceac124f4c0d384183e4074f7da55b87e27d117d4feddc63844b3a7b06b196d704154a44005bae18e8b3f135d8008f877ef6bafe76
|
7
|
+
data.tar.gz: 090d1e070fab264a8f4b1c7150e6d4161e357ab146cc0303480cc6ee0aebdcba80659f9a7dba02ffadf0939f47140a7038f83aa571b9053e2cd9d1fb58a2b91f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 2.8.0
|
2
|
+
- new feature: add 'timeout_timestamp_field' option.
|
3
|
+
When set, this option lets to compute timeout based on event timestamp field (and not system time). It's particularly useful when processing old logs.
|
4
|
+
|
1
5
|
## 2.7.2
|
2
6
|
- bugfix: fix synchronisation issue at Logstash shutdown (#75)
|
3
7
|
|
@@ -24,7 +28,8 @@
|
|
24
28
|
- docs: bump patch level for doc build
|
25
29
|
|
26
30
|
## 2.6.0
|
27
|
-
- new feature: 'inactivity_timeout'
|
31
|
+
- new feature: add 'inactivity_timeout' option.
|
32
|
+
Events for a given `task_id` will be aggregated for as long as they keep arriving within the defined `inactivity_timeout` option - the inactivity timeout is reset each time a new event happens. On the contrary, `timeout` is never reset and happens after `timeout` seconds since aggregation map creation.
|
28
33
|
|
29
34
|
## 2.5.2
|
30
35
|
- bugfix: fix 'aggregate_maps_path' load (issue #62). Re-start of Logstash died when no data were provided in 'aggregate_maps_path' file for some aggregate task_id patterns
|
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2012-
|
1
|
+
Copyright (c) 2012-2018 Elasticsearch <http://www.elasticsearch.org>
|
2
2
|
|
3
3
|
Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
you may not use this file except in compliance with the License.
|
data/docs/index.asciidoc
CHANGED
@@ -328,7 +328,7 @@ filter {
|
|
328
328
|
* an aggregate map is tied to one task_id value which is tied to one task_id pattern. So if you have 2 filters with different task_id patterns, even if you have same task_id value, they won't share the same aggregate map.
|
329
329
|
* in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
330
330
|
* if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
331
|
-
* all timeout options have to be defined in only one aggregate filter per task_id pattern (per pipeline). Timeout options are : timeout, inactivity_timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
|
331
|
+
* all timeout options have to be defined in only one aggregate filter per task_id pattern (per pipeline). Timeout options are : timeout, inactivity_timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_timestamp_field, timeout_task_id_field, timeout_tags
|
332
332
|
* if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
333
333
|
|
334
334
|
|
@@ -362,6 +362,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
362
362
|
| <<plugins-{type}s-{plugin}-timeout_code>> |<<string,string>>|No
|
363
363
|
| <<plugins-{type}s-{plugin}-timeout_tags>> |<<array,array>>|No
|
364
364
|
| <<plugins-{type}s-{plugin}-timeout_task_id_field>> |<<string,string>>|No
|
365
|
+
| <<plugins-{type}s-{plugin}-timeout_timestamp_field>> |<<string,string>>|No
|
365
366
|
|=======================================================================
|
366
367
|
|
367
368
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -532,7 +533,7 @@ Example:
|
|
532
533
|
[source,ruby]
|
533
534
|
filter {
|
534
535
|
aggregate {
|
535
|
-
timeout_tags => ["aggregate_timeout
|
536
|
+
timeout_tags => ["aggregate_timeout"]
|
536
537
|
}
|
537
538
|
}
|
538
539
|
|
@@ -542,13 +543,43 @@ Example:
|
|
542
543
|
* Value type is <<string,string>>
|
543
544
|
* There is no default value for this setting.
|
544
545
|
|
545
|
-
This option indicates the timeout generated event's field
|
546
|
-
|
547
|
-
|
548
|
-
For example, with option `timeout_task_id_field => "my_id"` ,when timeout task id is `"12345"`, the generated timeout event will contain `'my_id' => '12345'`.
|
546
|
+
This option indicates the timeout generated event's field where the current "task_id" value will be set.
|
547
|
+
This can help to correlate which tasks have been timed out.
|
549
548
|
|
550
549
|
By default, if this option is not set, task id value won't be set into timeout generated event.
|
551
550
|
|
551
|
+
Example:
|
552
|
+
[source,ruby]
|
553
|
+
filter {
|
554
|
+
aggregate {
|
555
|
+
timeout_task_id_field => "task_id"
|
556
|
+
}
|
557
|
+
}
|
558
|
+
|
559
|
+
[id="plugins-{type}s-{plugin}-timeout_timestamp_field"]
|
560
|
+
===== `timeout_timestamp_field`
|
561
|
+
|
562
|
+
* Value type is <<string,string>>
|
563
|
+
* There is no default value for this setting.
|
564
|
+
|
565
|
+
By default, timeout is computed using system time, where Logstash is running.
|
566
|
+
|
567
|
+
When this option is set, timeout is computed using event timestamp field indicated in this option.
|
568
|
+
It means that when a first event arrives on aggregate filter and induces a map creation, map creation time will be equal to this event timestamp.
|
569
|
+
Then, each time a new event arrives on aggregate filter, event timestamp is compared to map creation time to check if timeout happened.
|
570
|
+
|
571
|
+
This option is particularly useful when processing old logs with option `push_map_as_event_on_timeout => true`.
|
572
|
+
It lets to generate aggregated events based on timeout on old logs, where system time is inappropriate.
|
573
|
+
|
574
|
+
Warning : so that this option works fine, it must be set on first aggregate filter.
|
575
|
+
|
576
|
+
Example:
|
577
|
+
[source,ruby]
|
578
|
+
filter {
|
579
|
+
aggregate {
|
580
|
+
timeout_timestamp_field => "@timestamp"
|
581
|
+
}
|
582
|
+
}
|
552
583
|
|
553
584
|
|
554
585
|
[id="plugins-{type}s-{plugin}-common-options"]
|
@@ -36,6 +36,8 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
36
36
|
|
37
37
|
config :push_previous_map_as_event, :validate => :boolean, :required => false, :default => false
|
38
38
|
|
39
|
+
config :timeout_timestamp_field, :validate => :string, :required => false
|
40
|
+
|
39
41
|
config :timeout_task_id_field, :validate => :string, :required => false
|
40
42
|
|
41
43
|
config :timeout_tags, :validate => :array, :required => false, :default => []
|
@@ -44,7 +46,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
44
46
|
# ################## #
|
45
47
|
# INSTANCE VARIABLES #
|
46
48
|
# ################## #
|
47
|
-
|
49
|
+
|
48
50
|
|
49
51
|
# pointer to current pipeline context
|
50
52
|
attr_accessor :current_pipeline
|
@@ -57,7 +59,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
57
59
|
|
58
60
|
# Default timeout (in seconds) when not defined in plugin configuration
|
59
61
|
DEFAULT_TIMEOUT = 1800
|
60
|
-
|
62
|
+
|
61
63
|
# Store all shared aggregate attributes per pipeline id
|
62
64
|
@@pipelines = {}
|
63
65
|
|
@@ -77,7 +79,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
77
79
|
if !@task_id.match(/%\{.+\}/)
|
78
80
|
raise LogStash::ConfigurationError, "Aggregate plugin: task_id pattern '#{@task_id}' must contain a dynamic expression like '%{field}'"
|
79
81
|
end
|
80
|
-
|
82
|
+
|
81
83
|
# process lambda expression to call in each filter call
|
82
84
|
eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
|
83
85
|
|
@@ -87,7 +89,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
87
89
|
end
|
88
90
|
|
89
91
|
# init pipeline context
|
90
|
-
@@pipelines[pipeline_id] ||= LogStash::Filters::Aggregate::Pipeline.new()
|
92
|
+
@@pipelines[pipeline_id] ||= LogStash::Filters::Aggregate::Pipeline.new()
|
91
93
|
@current_pipeline = @@pipelines[pipeline_id]
|
92
94
|
|
93
95
|
@current_pipeline.mutex.synchronize do
|
@@ -103,23 +105,23 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
103
105
|
end
|
104
106
|
|
105
107
|
# timeout management : define default_timeout
|
106
|
-
if
|
108
|
+
if @timeout && (@current_pipeline.default_timeout.nil? || @timeout < @current_pipeline.default_timeout)
|
107
109
|
@current_pipeline.default_timeout = @timeout
|
108
110
|
@logger.debug("Aggregate default timeout: #{@timeout} seconds")
|
109
111
|
end
|
110
112
|
|
111
113
|
# inactivity timeout management: make sure it is lower than timeout
|
112
|
-
if
|
114
|
+
if @inactivity_timeout && ((@timeout && @inactivity_timeout > @timeout) || (@current_pipeline.default_timeout && @inactivity_timeout > @current_pipeline.default_timeout))
|
113
115
|
raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, inactivity_timeout must be lower than timeout"
|
114
116
|
end
|
115
117
|
|
116
118
|
# reinit pipeline_close_instance (if necessary)
|
117
|
-
if !@current_pipeline.aggregate_maps_path_set &&
|
119
|
+
if !@current_pipeline.aggregate_maps_path_set && @current_pipeline.pipeline_close_instance
|
118
120
|
@current_pipeline.pipeline_close_instance = nil
|
119
121
|
end
|
120
122
|
|
121
123
|
# check if aggregate_maps_path option has already been set on another instance else set @current_pipeline.aggregate_maps_path_set
|
122
|
-
if
|
124
|
+
if @aggregate_maps_path
|
123
125
|
if @current_pipeline.aggregate_maps_path_set
|
124
126
|
@current_pipeline.aggregate_maps_path_set = false
|
125
127
|
raise LogStash::ConfigurationError, "Aggregate plugin: Option 'aggregate_maps_path' must be set on only one aggregate filter"
|
@@ -130,7 +132,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
130
132
|
end
|
131
133
|
|
132
134
|
# load aggregate maps from file (if option defined)
|
133
|
-
if
|
135
|
+
if @aggregate_maps_path && File.exist?(@aggregate_maps_path)
|
134
136
|
File.open(@aggregate_maps_path, "r") { |from_file| @current_pipeline.aggregate_maps.merge!(Marshal.load(from_file)) }
|
135
137
|
File.delete(@aggregate_maps_path)
|
136
138
|
@logger.info("Aggregate maps loaded from : #{@aggregate_maps_path}")
|
@@ -138,8 +140,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
138
140
|
|
139
141
|
# init aggregate_maps
|
140
142
|
@current_pipeline.aggregate_maps[@task_id] ||= {}
|
141
|
-
|
142
|
-
|
143
|
+
|
143
144
|
end
|
144
145
|
end
|
145
146
|
|
@@ -156,7 +157,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
156
157
|
# store aggregate maps to file (if option defined)
|
157
158
|
@current_pipeline.mutex.synchronize do
|
158
159
|
@current_pipeline.aggregate_maps.delete_if { |key, value| value.empty? }
|
159
|
-
if
|
160
|
+
if @aggregate_maps_path && !@current_pipeline.aggregate_maps.empty?
|
160
161
|
File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@current_pipeline.aggregate_maps, to_file) }
|
161
162
|
@logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
|
162
163
|
end
|
@@ -182,26 +183,36 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
182
183
|
# protect aggregate_maps against concurrent access, using a mutex
|
183
184
|
@current_pipeline.mutex.synchronize do
|
184
185
|
|
186
|
+
# if timeout is based on event timestamp, check if task_id map is expired and should be removed
|
187
|
+
if @timeout_timestamp_field
|
188
|
+
event_to_yield = remove_expired_map_based_on_event_timestamp(task_id, event)
|
189
|
+
end
|
190
|
+
|
185
191
|
# retrieve the current aggregate map
|
186
192
|
aggregate_maps_element = @current_pipeline.aggregate_maps[@task_id][task_id]
|
187
193
|
|
188
|
-
|
189
|
-
# create aggregate map, if it doesn't exist
|
194
|
+
# case where aggregate map isn't already created
|
190
195
|
if aggregate_maps_element.nil?
|
191
196
|
return if @map_action == "update"
|
197
|
+
|
192
198
|
# create new event from previous map, if @push_previous_map_as_event is enabled
|
193
199
|
if @push_previous_map_as_event && !@current_pipeline.aggregate_maps[@task_id].empty?
|
194
200
|
event_to_yield = extract_previous_map_as_event()
|
195
201
|
end
|
196
|
-
|
202
|
+
|
203
|
+
# create aggregate map
|
204
|
+
creation_timestamp = reference_timestamp(event)
|
205
|
+
aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(creation_timestamp)
|
197
206
|
@current_pipeline.aggregate_maps[@task_id][task_id] = aggregate_maps_element
|
198
207
|
else
|
199
208
|
return if @map_action == "create"
|
200
209
|
end
|
201
|
-
|
210
|
+
|
202
211
|
# update last event timestamp
|
203
|
-
aggregate_maps_element.lastevent_timestamp =
|
212
|
+
aggregate_maps_element.lastevent_timestamp = reference_timestamp(event)
|
213
|
+
|
204
214
|
# execute the code to read/update map and event
|
215
|
+
map = aggregate_maps_element.map
|
205
216
|
begin
|
206
217
|
@codeblock.call(event, map)
|
207
218
|
@logger.debug("Aggregate successful filter code execution", :code => @code)
|
@@ -224,8 +235,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
224
235
|
filter_matched(event) if noError
|
225
236
|
|
226
237
|
# yield previous map as new event if set
|
227
|
-
yield event_to_yield
|
228
|
-
|
238
|
+
yield event_to_yield if event_to_yield
|
229
239
|
end
|
230
240
|
|
231
241
|
# Create a new event from the aggregation_map and the corresponding task_id
|
@@ -279,23 +289,11 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
279
289
|
|
280
290
|
@logger.debug("Aggregate flush call with #{options}")
|
281
291
|
|
282
|
-
#
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
if
|
287
|
-
@current_pipeline.flush_instance_map[@task_id] = self
|
288
|
-
@timeout = @current_pipeline.default_timeout
|
289
|
-
elsif @current_pipeline.flush_instance_map[@task_id].timeout.nil?
|
290
|
-
@current_pipeline.flush_instance_map[@task_id].timeout = @current_pipeline.default_timeout
|
291
|
-
end
|
292
|
-
|
293
|
-
if @current_pipeline.flush_instance_map[@task_id].inactivity_timeout.nil?
|
294
|
-
@current_pipeline.flush_instance_map[@task_id].inactivity_timeout = @current_pipeline.flush_instance_map[@task_id].timeout
|
295
|
-
end
|
296
|
-
|
297
|
-
# Launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
|
298
|
-
if @current_pipeline.flush_instance_map[@task_id] == self && !@current_pipeline.aggregate_maps[@task_id].nil? && (!@current_pipeline.last_flush_timestamp_map.has_key?(@task_id) || Time.now > @current_pipeline.last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
|
292
|
+
# init flush/timeout properties for current pipeline
|
293
|
+
init_pipeline_timeout_management()
|
294
|
+
|
295
|
+
# launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
|
296
|
+
if @current_pipeline.flush_instance_map[@task_id] == self && @current_pipeline.aggregate_maps[@task_id] && (!@current_pipeline.last_flush_timestamp_map.has_key?(@task_id) || Time.now > @current_pipeline.last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
|
299
297
|
events_to_flush = remove_expired_maps()
|
300
298
|
|
301
299
|
# at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
|
@@ -318,9 +316,32 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
318
316
|
else
|
319
317
|
return []
|
320
318
|
end
|
321
|
-
|
322
319
|
end
|
320
|
+
|
321
|
+
# init flush/timeout properties for current pipeline
|
322
|
+
def init_pipeline_timeout_management()
|
323
|
+
|
324
|
+
# Define default timeout (if not defined by user)
|
325
|
+
if @current_pipeline.default_timeout.nil?
|
326
|
+
@current_pipeline.default_timeout = DEFAULT_TIMEOUT
|
327
|
+
end
|
328
|
+
|
329
|
+
# Define default flush instance that manages timeout (if not defined by user)
|
330
|
+
if !@current_pipeline.flush_instance_map.has_key?(@task_id)
|
331
|
+
@current_pipeline.flush_instance_map[@task_id] = self
|
332
|
+
end
|
323
333
|
|
334
|
+
# Define timeout and inactivity_timeout (if not defined by user)
|
335
|
+
if @current_pipeline.flush_instance_map[@task_id] == self
|
336
|
+
if @timeout.nil?
|
337
|
+
@timeout = @current_pipeline.default_timeout
|
338
|
+
end
|
339
|
+
if @inactivity_timeout.nil?
|
340
|
+
@inactivity_timeout = @timeout
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
324
345
|
|
325
346
|
# Remove the expired Aggregate maps from @current_pipeline.aggregate_maps if they are older than timeout or if no new event has been received since inactivity_timeout.
|
326
347
|
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired maps are returned as new events to be flushed to Logstash pipeline.
|
@@ -334,7 +355,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
334
355
|
@logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@current_pipeline.aggregate_maps[@task_id].length} maps")
|
335
356
|
|
336
357
|
@current_pipeline.aggregate_maps[@task_id].delete_if do |key, element|
|
337
|
-
if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
|
358
|
+
if element.creation_timestamp + element.difference_from_creation_to_now < min_timestamp || element.lastevent_timestamp + element.difference_from_creation_to_now < min_inactivity_timestamp
|
338
359
|
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
339
360
|
events_to_flush << create_timeout_event(element.map, key)
|
340
361
|
end
|
@@ -347,6 +368,33 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
347
368
|
return events_to_flush
|
348
369
|
end
|
349
370
|
|
371
|
+
# Remove the expired Aggregate map associated to task_id if it is older than timeout or if no new event has been received since inactivity_timeout (relative to current event timestamp).
|
372
|
+
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired map is returned as new event to be flushed to Logstash pipeline.
|
373
|
+
def remove_expired_map_based_on_event_timestamp(task_id, event)
|
374
|
+
|
375
|
+
@logger.debug("Aggregate remove_expired_map_based_on_event_timestamp call with task_id : '#{@task_id}'")
|
376
|
+
|
377
|
+
# get aggregate map element
|
378
|
+
element = @current_pipeline.aggregate_maps[@task_id][task_id]
|
379
|
+
return nil if element.nil?
|
380
|
+
|
381
|
+
init_pipeline_timeout_management()
|
382
|
+
|
383
|
+
event_to_flush = nil
|
384
|
+
event_timestamp = reference_timestamp(event)
|
385
|
+
min_timestamp = event_timestamp - @timeout
|
386
|
+
min_inactivity_timestamp = event_timestamp - @inactivity_timeout
|
387
|
+
|
388
|
+
if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
|
389
|
+
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
390
|
+
event_to_flush = create_timeout_event(element.map, task_id)
|
391
|
+
end
|
392
|
+
@current_pipeline.aggregate_maps[@task_id].delete(task_id)
|
393
|
+
end
|
394
|
+
|
395
|
+
return event_to_flush
|
396
|
+
end
|
397
|
+
|
350
398
|
# return if this filter instance has any timeout option enabled in logstash configuration
|
351
399
|
def has_timeout_options?()
|
352
400
|
return (
|
@@ -355,6 +403,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
355
403
|
timeout_code ||
|
356
404
|
push_map_as_event_on_timeout ||
|
357
405
|
push_previous_map_as_event ||
|
406
|
+
timeout_timestamp_field ||
|
358
407
|
timeout_task_id_field ||
|
359
408
|
!timeout_tags.empty?
|
360
409
|
)
|
@@ -368,11 +417,12 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
368
417
|
"timeout_code",
|
369
418
|
"push_map_as_event_on_timeout",
|
370
419
|
"push_previous_map_as_event",
|
420
|
+
"timeout_timestamp_field",
|
371
421
|
"timeout_task_id_field",
|
372
422
|
"timeout_tags"
|
373
423
|
].join(", ")
|
374
424
|
end
|
375
|
-
|
425
|
+
|
376
426
|
# return current pipeline id
|
377
427
|
def pipeline_id()
|
378
428
|
if @execution_context
|
@@ -382,45 +432,52 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
382
432
|
end
|
383
433
|
end
|
384
434
|
|
435
|
+
# compute and return "reference" timestamp to compute timeout :
|
436
|
+
# by default "system current time" or event timestamp if timeout_timestamp_field option is defined
|
437
|
+
def reference_timestamp(event)
|
438
|
+
return (@timeout_timestamp_field) ? event.get(@timeout_timestamp_field).time : Time.now
|
439
|
+
end
|
440
|
+
|
385
441
|
end # class LogStash::Filters::Aggregate
|
386
442
|
|
387
443
|
# Element of "aggregate_maps"
|
388
444
|
class LogStash::Filters::Aggregate::Element
|
389
445
|
|
390
|
-
attr_accessor :creation_timestamp, :lastevent_timestamp, :map
|
446
|
+
attr_accessor :creation_timestamp, :lastevent_timestamp, :difference_from_creation_to_now, :map
|
391
447
|
|
392
448
|
def initialize(creation_timestamp)
|
393
449
|
@creation_timestamp = creation_timestamp
|
394
450
|
@lastevent_timestamp = creation_timestamp
|
451
|
+
@difference_from_creation_to_now = (Time.now - creation_timestamp).to_i
|
395
452
|
@map = {}
|
396
453
|
end
|
397
454
|
end
|
398
455
|
|
399
456
|
# shared aggregate attributes for each pipeline
|
400
457
|
class LogStash::Filters::Aggregate::Pipeline
|
401
|
-
|
458
|
+
|
402
459
|
attr_accessor :aggregate_maps, :mutex, :default_timeout, :flush_instance_map, :last_flush_timestamp_map, :aggregate_maps_path_set, :pipeline_close_instance
|
403
460
|
|
404
461
|
def initialize()
|
405
462
|
# Stores all aggregate maps, per task_id pattern, then per task_id value
|
406
463
|
@aggregate_maps = {}
|
407
|
-
|
464
|
+
|
408
465
|
# Mutex used to synchronize access to 'aggregate_maps'
|
409
466
|
@mutex = Mutex.new
|
410
|
-
|
467
|
+
|
411
468
|
# Default timeout for task_id patterns where timeout is not defined in Logstash filter configuration
|
412
469
|
@default_timeout = nil
|
413
|
-
|
470
|
+
|
414
471
|
# For each "task_id" pattern, defines which Aggregate instance will process flush() call, processing expired Aggregate elements (older than timeout)
|
415
472
|
# For each entry, key is "task_id pattern" and value is "aggregate instance"
|
416
473
|
@flush_instance_map = {}
|
417
|
-
|
474
|
+
|
418
475
|
# last time where timeout management in flush() method was launched, per "task_id" pattern
|
419
476
|
@last_flush_timestamp_map = {}
|
420
|
-
|
477
|
+
|
421
478
|
# flag indicating if aggregate_maps_path option has been already set on one aggregate instance
|
422
479
|
@aggregate_maps_path_set = false
|
423
|
-
|
480
|
+
|
424
481
|
# defines which Aggregate instance will close Aggregate variables associated to current pipeline
|
425
482
|
@pipeline_close_instance = nil
|
426
483
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-filter-aggregate'
|
3
|
-
s.version = '2.
|
3
|
+
s.version = '2.8.0'
|
4
4
|
s.licenses = ['Apache License (2.0)']
|
5
5
|
s.summary = "Aggregates information from several events originating with a single task"
|
6
6
|
s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
|
@@ -163,6 +163,7 @@ describe LogStash::Filters::Aggregate do
|
|
163
163
|
describe "no timeout defined in none filter" do
|
164
164
|
it "defines a default timeout on a default filter" do
|
165
165
|
reset_timeout_management()
|
166
|
+
@end_filter.timeout = nil
|
166
167
|
expect(taskid_eviction_instance).to be_nil
|
167
168
|
@end_filter.flush()
|
168
169
|
expect(taskid_eviction_instance).to eq(@end_filter)
|
@@ -332,7 +333,10 @@ describe LogStash::Filters::Aggregate do
|
|
332
333
|
it "should push previous map as new event" do
|
333
334
|
push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5, "timeout_task_id_field" => "timeout_task_id_field" })
|
334
335
|
push_filter.filter(event({"ppm_id" => "1"})) { |yield_event| fail "task 1 shouldn't have yield event" }
|
335
|
-
push_filter.filter(event({"ppm_id" => "2"}))
|
336
|
+
push_filter.filter(event({"ppm_id" => "2"})) do |yield_event|
|
337
|
+
expect(yield_event.get("ppm_id")).to eq("1")
|
338
|
+
expect(yield_event.get("timeout_task_id_field")).to eq("1")
|
339
|
+
end
|
336
340
|
expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
|
337
341
|
end
|
338
342
|
end
|
@@ -367,5 +371,22 @@ describe LogStash::Filters::Aggregate do
|
|
367
371
|
end
|
368
372
|
end
|
369
373
|
|
374
|
+
context "timeout_timestamp_field option is defined, " do
|
375
|
+
describe "when 3 old events arrive, " do
|
376
|
+
it "should push a new aggregated event using timeout based on events timestamp" do
|
377
|
+
agg_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['sql_duration'] ||= 0; map['sql_duration'] += event.get('duration')", "timeout_timestamp_field" => "@timestamp", "push_map_as_event_on_timeout" => true, "timeout" => 120 })
|
378
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 2, "@timestamp" => timestamp("2018-01-31T00:00:00Z")})) { |yield_event| fail "it shouldn't have yield event" }
|
379
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 3, "@timestamp" => timestamp("2018-01-31T00:00:01Z")})) { |yield_event| fail "it shouldn't have yield event" }
|
380
|
+
events_to_flush = agg_filter.flush()
|
381
|
+
expect(events_to_flush).to be_empty
|
382
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 4, "@timestamp" => timestamp("2018-01-31T00:05:00Z")})) do |yield_event|
|
383
|
+
expect(yield_event).not_to be_nil
|
384
|
+
expect(yield_event.get("sql_duration")).to eq(5)
|
385
|
+
end
|
386
|
+
expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
|
387
|
+
expect(aggregate_maps["%{ppm_id}"]["1"].map["sql_duration"]).to eq(4)
|
388
|
+
end
|
389
|
+
end
|
390
|
+
end
|
370
391
|
|
371
392
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-03-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|