logstash-filter-aggregate 2.7.2 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/LICENSE +1 -1
- data/docs/index.asciidoc +37 -6
- data/lib/logstash/filters/aggregate.rb +104 -47
- data/logstash-filter-aggregate.gemspec +1 -1
- data/spec/filters/aggregate_spec.rb +22 -1
- data/spec/filters/aggregate_spec_helper.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cbb27a58c0339ae9f7908d18250a5f91c456f6f
|
4
|
+
data.tar.gz: e86b38aa410918fb62cf7050be5fd8b9f44d6ed9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d14e1e14cdf0342db92e06ceac124f4c0d384183e4074f7da55b87e27d117d4feddc63844b3a7b06b196d704154a44005bae18e8b3f135d8008f877ef6bafe76
|
7
|
+
data.tar.gz: 090d1e070fab264a8f4b1c7150e6d4161e357ab146cc0303480cc6ee0aebdcba80659f9a7dba02ffadf0939f47140a7038f83aa571b9053e2cd9d1fb58a2b91f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 2.8.0
|
2
|
+
- new feature: add 'timeout_timestamp_field' option.
|
3
|
+
When set, this option lets to compute timeout based on event timestamp field (and not system time). It's particularly useful when processing old logs.
|
4
|
+
|
1
5
|
## 2.7.2
|
2
6
|
- bugfix: fix synchronisation issue at Logstash shutdown (#75)
|
3
7
|
|
@@ -24,7 +28,8 @@
|
|
24
28
|
- docs: bump patch level for doc build
|
25
29
|
|
26
30
|
## 2.6.0
|
27
|
-
- new feature: 'inactivity_timeout'
|
31
|
+
- new feature: add 'inactivity_timeout' option.
|
32
|
+
Events for a given `task_id` will be aggregated for as long as they keep arriving within the defined `inactivity_timeout` option - the inactivity timeout is reset each time a new event happens. On the contrary, `timeout` is never reset and happens after `timeout` seconds since aggregation map creation.
|
28
33
|
|
29
34
|
## 2.5.2
|
30
35
|
- bugfix: fix 'aggregate_maps_path' load (issue #62). Re-start of Logstash died when no data were provided in 'aggregate_maps_path' file for some aggregate task_id patterns
|
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2012-
|
1
|
+
Copyright (c) 2012-2018 Elasticsearch <http://www.elasticsearch.org>
|
2
2
|
|
3
3
|
Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
you may not use this file except in compliance with the License.
|
data/docs/index.asciidoc
CHANGED
@@ -328,7 +328,7 @@ filter {
|
|
328
328
|
* an aggregate map is tied to one task_id value which is tied to one task_id pattern. So if you have 2 filters with different task_id patterns, even if you have same task_id value, they won't share the same aggregate map.
|
329
329
|
* in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
330
330
|
* if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
331
|
-
* all timeout options have to be defined in only one aggregate filter per task_id pattern (per pipeline). Timeout options are : timeout, inactivity_timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
|
331
|
+
* all timeout options have to be defined in only one aggregate filter per task_id pattern (per pipeline). Timeout options are : timeout, inactivity_timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_timestamp_field, timeout_task_id_field, timeout_tags
|
332
332
|
* if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
333
333
|
|
334
334
|
|
@@ -362,6 +362,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
362
362
|
| <<plugins-{type}s-{plugin}-timeout_code>> |<<string,string>>|No
|
363
363
|
| <<plugins-{type}s-{plugin}-timeout_tags>> |<<array,array>>|No
|
364
364
|
| <<plugins-{type}s-{plugin}-timeout_task_id_field>> |<<string,string>>|No
|
365
|
+
| <<plugins-{type}s-{plugin}-timeout_timestamp_field>> |<<string,string>>|No
|
365
366
|
|=======================================================================
|
366
367
|
|
367
368
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -532,7 +533,7 @@ Example:
|
|
532
533
|
[source,ruby]
|
533
534
|
filter {
|
534
535
|
aggregate {
|
535
|
-
timeout_tags => ["aggregate_timeout
|
536
|
+
timeout_tags => ["aggregate_timeout"]
|
536
537
|
}
|
537
538
|
}
|
538
539
|
|
@@ -542,13 +543,43 @@ Example:
|
|
542
543
|
* Value type is <<string,string>>
|
543
544
|
* There is no default value for this setting.
|
544
545
|
|
545
|
-
This option indicates the timeout generated event's field
|
546
|
-
|
547
|
-
|
548
|
-
For example, with option `timeout_task_id_field => "my_id"` ,when timeout task id is `"12345"`, the generated timeout event will contain `'my_id' => '12345'`.
|
546
|
+
This option indicates the timeout generated event's field where the current "task_id" value will be set.
|
547
|
+
This can help to correlate which tasks have been timed out.
|
549
548
|
|
550
549
|
By default, if this option is not set, task id value won't be set into timeout generated event.
|
551
550
|
|
551
|
+
Example:
|
552
|
+
[source,ruby]
|
553
|
+
filter {
|
554
|
+
aggregate {
|
555
|
+
timeout_task_id_field => "task_id"
|
556
|
+
}
|
557
|
+
}
|
558
|
+
|
559
|
+
[id="plugins-{type}s-{plugin}-timeout_timestamp_field"]
|
560
|
+
===== `timeout_timestamp_field`
|
561
|
+
|
562
|
+
* Value type is <<string,string>>
|
563
|
+
* There is no default value for this setting.
|
564
|
+
|
565
|
+
By default, timeout is computed using system time, where Logstash is running.
|
566
|
+
|
567
|
+
When this option is set, timeout is computed using event timestamp field indicated in this option.
|
568
|
+
It means that when a first event arrives on aggregate filter and induces a map creation, map creation time will be equal to this event timestamp.
|
569
|
+
Then, each time a new event arrives on aggregate filter, event timestamp is compared to map creation time to check if timeout happened.
|
570
|
+
|
571
|
+
This option is particularly useful when processing old logs with option `push_map_as_event_on_timeout => true`.
|
572
|
+
It lets to generate aggregated events based on timeout on old logs, where system time is inappropriate.
|
573
|
+
|
574
|
+
Warning : so that this option works fine, it must be set on first aggregate filter.
|
575
|
+
|
576
|
+
Example:
|
577
|
+
[source,ruby]
|
578
|
+
filter {
|
579
|
+
aggregate {
|
580
|
+
timeout_timestamp_field => "@timestamp"
|
581
|
+
}
|
582
|
+
}
|
552
583
|
|
553
584
|
|
554
585
|
[id="plugins-{type}s-{plugin}-common-options"]
|
@@ -36,6 +36,8 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
36
36
|
|
37
37
|
config :push_previous_map_as_event, :validate => :boolean, :required => false, :default => false
|
38
38
|
|
39
|
+
config :timeout_timestamp_field, :validate => :string, :required => false
|
40
|
+
|
39
41
|
config :timeout_task_id_field, :validate => :string, :required => false
|
40
42
|
|
41
43
|
config :timeout_tags, :validate => :array, :required => false, :default => []
|
@@ -44,7 +46,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
44
46
|
# ################## #
|
45
47
|
# INSTANCE VARIABLES #
|
46
48
|
# ################## #
|
47
|
-
|
49
|
+
|
48
50
|
|
49
51
|
# pointer to current pipeline context
|
50
52
|
attr_accessor :current_pipeline
|
@@ -57,7 +59,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
57
59
|
|
58
60
|
# Default timeout (in seconds) when not defined in plugin configuration
|
59
61
|
DEFAULT_TIMEOUT = 1800
|
60
|
-
|
62
|
+
|
61
63
|
# Store all shared aggregate attributes per pipeline id
|
62
64
|
@@pipelines = {}
|
63
65
|
|
@@ -77,7 +79,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
77
79
|
if !@task_id.match(/%\{.+\}/)
|
78
80
|
raise LogStash::ConfigurationError, "Aggregate plugin: task_id pattern '#{@task_id}' must contain a dynamic expression like '%{field}'"
|
79
81
|
end
|
80
|
-
|
82
|
+
|
81
83
|
# process lambda expression to call in each filter call
|
82
84
|
eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
|
83
85
|
|
@@ -87,7 +89,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
87
89
|
end
|
88
90
|
|
89
91
|
# init pipeline context
|
90
|
-
@@pipelines[pipeline_id] ||= LogStash::Filters::Aggregate::Pipeline.new()
|
92
|
+
@@pipelines[pipeline_id] ||= LogStash::Filters::Aggregate::Pipeline.new()
|
91
93
|
@current_pipeline = @@pipelines[pipeline_id]
|
92
94
|
|
93
95
|
@current_pipeline.mutex.synchronize do
|
@@ -103,23 +105,23 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
103
105
|
end
|
104
106
|
|
105
107
|
# timeout management : define default_timeout
|
106
|
-
if
|
108
|
+
if @timeout && (@current_pipeline.default_timeout.nil? || @timeout < @current_pipeline.default_timeout)
|
107
109
|
@current_pipeline.default_timeout = @timeout
|
108
110
|
@logger.debug("Aggregate default timeout: #{@timeout} seconds")
|
109
111
|
end
|
110
112
|
|
111
113
|
# inactivity timeout management: make sure it is lower than timeout
|
112
|
-
if
|
114
|
+
if @inactivity_timeout && ((@timeout && @inactivity_timeout > @timeout) || (@current_pipeline.default_timeout && @inactivity_timeout > @current_pipeline.default_timeout))
|
113
115
|
raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, inactivity_timeout must be lower than timeout"
|
114
116
|
end
|
115
117
|
|
116
118
|
# reinit pipeline_close_instance (if necessary)
|
117
|
-
if !@current_pipeline.aggregate_maps_path_set &&
|
119
|
+
if !@current_pipeline.aggregate_maps_path_set && @current_pipeline.pipeline_close_instance
|
118
120
|
@current_pipeline.pipeline_close_instance = nil
|
119
121
|
end
|
120
122
|
|
121
123
|
# check if aggregate_maps_path option has already been set on another instance else set @current_pipeline.aggregate_maps_path_set
|
122
|
-
if
|
124
|
+
if @aggregate_maps_path
|
123
125
|
if @current_pipeline.aggregate_maps_path_set
|
124
126
|
@current_pipeline.aggregate_maps_path_set = false
|
125
127
|
raise LogStash::ConfigurationError, "Aggregate plugin: Option 'aggregate_maps_path' must be set on only one aggregate filter"
|
@@ -130,7 +132,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
130
132
|
end
|
131
133
|
|
132
134
|
# load aggregate maps from file (if option defined)
|
133
|
-
if
|
135
|
+
if @aggregate_maps_path && File.exist?(@aggregate_maps_path)
|
134
136
|
File.open(@aggregate_maps_path, "r") { |from_file| @current_pipeline.aggregate_maps.merge!(Marshal.load(from_file)) }
|
135
137
|
File.delete(@aggregate_maps_path)
|
136
138
|
@logger.info("Aggregate maps loaded from : #{@aggregate_maps_path}")
|
@@ -138,8 +140,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
138
140
|
|
139
141
|
# init aggregate_maps
|
140
142
|
@current_pipeline.aggregate_maps[@task_id] ||= {}
|
141
|
-
|
142
|
-
|
143
|
+
|
143
144
|
end
|
144
145
|
end
|
145
146
|
|
@@ -156,7 +157,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
156
157
|
# store aggregate maps to file (if option defined)
|
157
158
|
@current_pipeline.mutex.synchronize do
|
158
159
|
@current_pipeline.aggregate_maps.delete_if { |key, value| value.empty? }
|
159
|
-
if
|
160
|
+
if @aggregate_maps_path && !@current_pipeline.aggregate_maps.empty?
|
160
161
|
File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@current_pipeline.aggregate_maps, to_file) }
|
161
162
|
@logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
|
162
163
|
end
|
@@ -182,26 +183,36 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
182
183
|
# protect aggregate_maps against concurrent access, using a mutex
|
183
184
|
@current_pipeline.mutex.synchronize do
|
184
185
|
|
186
|
+
# if timeout is based on event timestamp, check if task_id map is expired and should be removed
|
187
|
+
if @timeout_timestamp_field
|
188
|
+
event_to_yield = remove_expired_map_based_on_event_timestamp(task_id, event)
|
189
|
+
end
|
190
|
+
|
185
191
|
# retrieve the current aggregate map
|
186
192
|
aggregate_maps_element = @current_pipeline.aggregate_maps[@task_id][task_id]
|
187
193
|
|
188
|
-
|
189
|
-
# create aggregate map, if it doesn't exist
|
194
|
+
# case where aggregate map isn't already created
|
190
195
|
if aggregate_maps_element.nil?
|
191
196
|
return if @map_action == "update"
|
197
|
+
|
192
198
|
# create new event from previous map, if @push_previous_map_as_event is enabled
|
193
199
|
if @push_previous_map_as_event && !@current_pipeline.aggregate_maps[@task_id].empty?
|
194
200
|
event_to_yield = extract_previous_map_as_event()
|
195
201
|
end
|
196
|
-
|
202
|
+
|
203
|
+
# create aggregate map
|
204
|
+
creation_timestamp = reference_timestamp(event)
|
205
|
+
aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(creation_timestamp)
|
197
206
|
@current_pipeline.aggregate_maps[@task_id][task_id] = aggregate_maps_element
|
198
207
|
else
|
199
208
|
return if @map_action == "create"
|
200
209
|
end
|
201
|
-
|
210
|
+
|
202
211
|
# update last event timestamp
|
203
|
-
aggregate_maps_element.lastevent_timestamp =
|
212
|
+
aggregate_maps_element.lastevent_timestamp = reference_timestamp(event)
|
213
|
+
|
204
214
|
# execute the code to read/update map and event
|
215
|
+
map = aggregate_maps_element.map
|
205
216
|
begin
|
206
217
|
@codeblock.call(event, map)
|
207
218
|
@logger.debug("Aggregate successful filter code execution", :code => @code)
|
@@ -224,8 +235,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
224
235
|
filter_matched(event) if noError
|
225
236
|
|
226
237
|
# yield previous map as new event if set
|
227
|
-
yield event_to_yield
|
228
|
-
|
238
|
+
yield event_to_yield if event_to_yield
|
229
239
|
end
|
230
240
|
|
231
241
|
# Create a new event from the aggregation_map and the corresponding task_id
|
@@ -279,23 +289,11 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
279
289
|
|
280
290
|
@logger.debug("Aggregate flush call with #{options}")
|
281
291
|
|
282
|
-
#
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
if
|
287
|
-
@current_pipeline.flush_instance_map[@task_id] = self
|
288
|
-
@timeout = @current_pipeline.default_timeout
|
289
|
-
elsif @current_pipeline.flush_instance_map[@task_id].timeout.nil?
|
290
|
-
@current_pipeline.flush_instance_map[@task_id].timeout = @current_pipeline.default_timeout
|
291
|
-
end
|
292
|
-
|
293
|
-
if @current_pipeline.flush_instance_map[@task_id].inactivity_timeout.nil?
|
294
|
-
@current_pipeline.flush_instance_map[@task_id].inactivity_timeout = @current_pipeline.flush_instance_map[@task_id].timeout
|
295
|
-
end
|
296
|
-
|
297
|
-
# Launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
|
298
|
-
if @current_pipeline.flush_instance_map[@task_id] == self && !@current_pipeline.aggregate_maps[@task_id].nil? && (!@current_pipeline.last_flush_timestamp_map.has_key?(@task_id) || Time.now > @current_pipeline.last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
|
292
|
+
# init flush/timeout properties for current pipeline
|
293
|
+
init_pipeline_timeout_management()
|
294
|
+
|
295
|
+
# launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
|
296
|
+
if @current_pipeline.flush_instance_map[@task_id] == self && @current_pipeline.aggregate_maps[@task_id] && (!@current_pipeline.last_flush_timestamp_map.has_key?(@task_id) || Time.now > @current_pipeline.last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
|
299
297
|
events_to_flush = remove_expired_maps()
|
300
298
|
|
301
299
|
# at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
|
@@ -318,9 +316,32 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
318
316
|
else
|
319
317
|
return []
|
320
318
|
end
|
321
|
-
|
322
319
|
end
|
320
|
+
|
321
|
+
# init flush/timeout properties for current pipeline
|
322
|
+
def init_pipeline_timeout_management()
|
323
|
+
|
324
|
+
# Define default timeout (if not defined by user)
|
325
|
+
if @current_pipeline.default_timeout.nil?
|
326
|
+
@current_pipeline.default_timeout = DEFAULT_TIMEOUT
|
327
|
+
end
|
328
|
+
|
329
|
+
# Define default flush instance that manages timeout (if not defined by user)
|
330
|
+
if !@current_pipeline.flush_instance_map.has_key?(@task_id)
|
331
|
+
@current_pipeline.flush_instance_map[@task_id] = self
|
332
|
+
end
|
323
333
|
|
334
|
+
# Define timeout and inactivity_timeout (if not defined by user)
|
335
|
+
if @current_pipeline.flush_instance_map[@task_id] == self
|
336
|
+
if @timeout.nil?
|
337
|
+
@timeout = @current_pipeline.default_timeout
|
338
|
+
end
|
339
|
+
if @inactivity_timeout.nil?
|
340
|
+
@inactivity_timeout = @timeout
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
324
345
|
|
325
346
|
# Remove the expired Aggregate maps from @current_pipeline.aggregate_maps if they are older than timeout or if no new event has been received since inactivity_timeout.
|
326
347
|
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired maps are returned as new events to be flushed to Logstash pipeline.
|
@@ -334,7 +355,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
334
355
|
@logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@current_pipeline.aggregate_maps[@task_id].length} maps")
|
335
356
|
|
336
357
|
@current_pipeline.aggregate_maps[@task_id].delete_if do |key, element|
|
337
|
-
if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
|
358
|
+
if element.creation_timestamp + element.difference_from_creation_to_now < min_timestamp || element.lastevent_timestamp + element.difference_from_creation_to_now < min_inactivity_timestamp
|
338
359
|
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
339
360
|
events_to_flush << create_timeout_event(element.map, key)
|
340
361
|
end
|
@@ -347,6 +368,33 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
347
368
|
return events_to_flush
|
348
369
|
end
|
349
370
|
|
371
|
+
# Remove the expired Aggregate map associated to task_id if it is older than timeout or if no new event has been received since inactivity_timeout (relative to current event timestamp).
|
372
|
+
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired map is returned as new event to be flushed to Logstash pipeline.
|
373
|
+
def remove_expired_map_based_on_event_timestamp(task_id, event)
|
374
|
+
|
375
|
+
@logger.debug("Aggregate remove_expired_map_based_on_event_timestamp call with task_id : '#{@task_id}'")
|
376
|
+
|
377
|
+
# get aggregate map element
|
378
|
+
element = @current_pipeline.aggregate_maps[@task_id][task_id]
|
379
|
+
return nil if element.nil?
|
380
|
+
|
381
|
+
init_pipeline_timeout_management()
|
382
|
+
|
383
|
+
event_to_flush = nil
|
384
|
+
event_timestamp = reference_timestamp(event)
|
385
|
+
min_timestamp = event_timestamp - @timeout
|
386
|
+
min_inactivity_timestamp = event_timestamp - @inactivity_timeout
|
387
|
+
|
388
|
+
if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
|
389
|
+
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
390
|
+
event_to_flush = create_timeout_event(element.map, task_id)
|
391
|
+
end
|
392
|
+
@current_pipeline.aggregate_maps[@task_id].delete(task_id)
|
393
|
+
end
|
394
|
+
|
395
|
+
return event_to_flush
|
396
|
+
end
|
397
|
+
|
350
398
|
# return if this filter instance has any timeout option enabled in logstash configuration
|
351
399
|
def has_timeout_options?()
|
352
400
|
return (
|
@@ -355,6 +403,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
355
403
|
timeout_code ||
|
356
404
|
push_map_as_event_on_timeout ||
|
357
405
|
push_previous_map_as_event ||
|
406
|
+
timeout_timestamp_field ||
|
358
407
|
timeout_task_id_field ||
|
359
408
|
!timeout_tags.empty?
|
360
409
|
)
|
@@ -368,11 +417,12 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
368
417
|
"timeout_code",
|
369
418
|
"push_map_as_event_on_timeout",
|
370
419
|
"push_previous_map_as_event",
|
420
|
+
"timeout_timestamp_field",
|
371
421
|
"timeout_task_id_field",
|
372
422
|
"timeout_tags"
|
373
423
|
].join(", ")
|
374
424
|
end
|
375
|
-
|
425
|
+
|
376
426
|
# return current pipeline id
|
377
427
|
def pipeline_id()
|
378
428
|
if @execution_context
|
@@ -382,45 +432,52 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
382
432
|
end
|
383
433
|
end
|
384
434
|
|
435
|
+
# compute and return "reference" timestamp to compute timeout :
|
436
|
+
# by default "system current time" or event timestamp if timeout_timestamp_field option is defined
|
437
|
+
def reference_timestamp(event)
|
438
|
+
return (@timeout_timestamp_field) ? event.get(@timeout_timestamp_field).time : Time.now
|
439
|
+
end
|
440
|
+
|
385
441
|
end # class LogStash::Filters::Aggregate
|
386
442
|
|
387
443
|
# Element of "aggregate_maps"
|
388
444
|
class LogStash::Filters::Aggregate::Element
|
389
445
|
|
390
|
-
attr_accessor :creation_timestamp, :lastevent_timestamp, :map
|
446
|
+
attr_accessor :creation_timestamp, :lastevent_timestamp, :difference_from_creation_to_now, :map
|
391
447
|
|
392
448
|
def initialize(creation_timestamp)
|
393
449
|
@creation_timestamp = creation_timestamp
|
394
450
|
@lastevent_timestamp = creation_timestamp
|
451
|
+
@difference_from_creation_to_now = (Time.now - creation_timestamp).to_i
|
395
452
|
@map = {}
|
396
453
|
end
|
397
454
|
end
|
398
455
|
|
399
456
|
# shared aggregate attributes for each pipeline
|
400
457
|
class LogStash::Filters::Aggregate::Pipeline
|
401
|
-
|
458
|
+
|
402
459
|
attr_accessor :aggregate_maps, :mutex, :default_timeout, :flush_instance_map, :last_flush_timestamp_map, :aggregate_maps_path_set, :pipeline_close_instance
|
403
460
|
|
404
461
|
def initialize()
|
405
462
|
# Stores all aggregate maps, per task_id pattern, then per task_id value
|
406
463
|
@aggregate_maps = {}
|
407
|
-
|
464
|
+
|
408
465
|
# Mutex used to synchronize access to 'aggregate_maps'
|
409
466
|
@mutex = Mutex.new
|
410
|
-
|
467
|
+
|
411
468
|
# Default timeout for task_id patterns where timeout is not defined in Logstash filter configuration
|
412
469
|
@default_timeout = nil
|
413
|
-
|
470
|
+
|
414
471
|
# For each "task_id" pattern, defines which Aggregate instance will process flush() call, processing expired Aggregate elements (older than timeout)
|
415
472
|
# For each entry, key is "task_id pattern" and value is "aggregate instance"
|
416
473
|
@flush_instance_map = {}
|
417
|
-
|
474
|
+
|
418
475
|
# last time where timeout management in flush() method was launched, per "task_id" pattern
|
419
476
|
@last_flush_timestamp_map = {}
|
420
|
-
|
477
|
+
|
421
478
|
# flag indicating if aggregate_maps_path option has been already set on one aggregate instance
|
422
479
|
@aggregate_maps_path_set = false
|
423
|
-
|
480
|
+
|
424
481
|
# defines which Aggregate instance will close Aggregate variables associated to current pipeline
|
425
482
|
@pipeline_close_instance = nil
|
426
483
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-filter-aggregate'
|
3
|
-
s.version = '2.
|
3
|
+
s.version = '2.8.0'
|
4
4
|
s.licenses = ['Apache License (2.0)']
|
5
5
|
s.summary = "Aggregates information from several events originating with a single task"
|
6
6
|
s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
|
@@ -163,6 +163,7 @@ describe LogStash::Filters::Aggregate do
|
|
163
163
|
describe "no timeout defined in none filter" do
|
164
164
|
it "defines a default timeout on a default filter" do
|
165
165
|
reset_timeout_management()
|
166
|
+
@end_filter.timeout = nil
|
166
167
|
expect(taskid_eviction_instance).to be_nil
|
167
168
|
@end_filter.flush()
|
168
169
|
expect(taskid_eviction_instance).to eq(@end_filter)
|
@@ -332,7 +333,10 @@ describe LogStash::Filters::Aggregate do
|
|
332
333
|
it "should push previous map as new event" do
|
333
334
|
push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5, "timeout_task_id_field" => "timeout_task_id_field" })
|
334
335
|
push_filter.filter(event({"ppm_id" => "1"})) { |yield_event| fail "task 1 shouldn't have yield event" }
|
335
|
-
push_filter.filter(event({"ppm_id" => "2"}))
|
336
|
+
push_filter.filter(event({"ppm_id" => "2"})) do |yield_event|
|
337
|
+
expect(yield_event.get("ppm_id")).to eq("1")
|
338
|
+
expect(yield_event.get("timeout_task_id_field")).to eq("1")
|
339
|
+
end
|
336
340
|
expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
|
337
341
|
end
|
338
342
|
end
|
@@ -367,5 +371,22 @@ describe LogStash::Filters::Aggregate do
|
|
367
371
|
end
|
368
372
|
end
|
369
373
|
|
374
|
+
context "timeout_timestamp_field option is defined, " do
|
375
|
+
describe "when 3 old events arrive, " do
|
376
|
+
it "should push a new aggregated event using timeout based on events timestamp" do
|
377
|
+
agg_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['sql_duration'] ||= 0; map['sql_duration'] += event.get('duration')", "timeout_timestamp_field" => "@timestamp", "push_map_as_event_on_timeout" => true, "timeout" => 120 })
|
378
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 2, "@timestamp" => timestamp("2018-01-31T00:00:00Z")})) { |yield_event| fail "it shouldn't have yield event" }
|
379
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 3, "@timestamp" => timestamp("2018-01-31T00:00:01Z")})) { |yield_event| fail "it shouldn't have yield event" }
|
380
|
+
events_to_flush = agg_filter.flush()
|
381
|
+
expect(events_to_flush).to be_empty
|
382
|
+
agg_filter.filter(event({"ppm_id" => "1", "duration" => 4, "@timestamp" => timestamp("2018-01-31T00:05:00Z")})) do |yield_event|
|
383
|
+
expect(yield_event).not_to be_nil
|
384
|
+
expect(yield_event.get("sql_duration")).to eq(5)
|
385
|
+
end
|
386
|
+
expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
|
387
|
+
expect(aggregate_maps["%{ppm_id}"]["1"].map["sql_duration"]).to eq(4)
|
388
|
+
end
|
389
|
+
end
|
390
|
+
end
|
370
391
|
|
371
392
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-03-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|