logstash-filter-aggregate 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/CONTRIBUTORS +1 -0
- data/Gemfile +9 -0
- data/{NOTICE.txt → NOTICE.TXT} +0 -0
- data/README.md +65 -288
- data/docs/index.asciidoc +552 -0
- data/lib/logstash/filters/aggregate.rb +179 -90
- data/logstash-filter-aggregate.gemspec +2 -2
- data/spec/filters/aggregate_spec.rb +69 -13
- metadata +4 -3
@@ -5,16 +5,16 @@ require "logstash/namespace"
|
|
5
5
|
require "thread"
|
6
6
|
require "logstash/util/decorators"
|
7
7
|
|
8
|
-
#
|
8
|
+
#
|
9
9
|
# The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
|
10
10
|
# and finally push aggregated information into final task event.
|
11
11
|
#
|
12
|
-
# You should be very careful to set Logstash filter workers to 1 (`-w 1` flag) for this filter to work correctly
|
12
|
+
# You should be very careful to set Logstash filter workers to 1 (`-w 1` flag) for this filter to work correctly
|
13
13
|
# otherwise events may be processed out of sequence and unexpected results will occur.
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# ==== Example #1
|
16
|
-
#
|
17
|
-
# * with these given logs :
|
16
|
+
#
|
17
|
+
# * with these given logs :
|
18
18
|
# [source,ruby]
|
19
19
|
# ----------------------------------
|
20
20
|
# INFO - 12345 - TASK_START - start
|
@@ -22,7 +22,7 @@ require "logstash/util/decorators"
|
|
22
22
|
# INFO - 12345 - SQL - sqlQuery2 - 34
|
23
23
|
# INFO - 12345 - TASK_END - end
|
24
24
|
# ----------------------------------
|
25
|
-
#
|
25
|
+
#
|
26
26
|
# * you can aggregate "sql duration" for the whole task with this configuration :
|
27
27
|
# [source,ruby]
|
28
28
|
# ----------------------------------
|
@@ -30,7 +30,7 @@ require "logstash/util/decorators"
|
|
30
30
|
# grok {
|
31
31
|
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
32
32
|
# }
|
33
|
-
#
|
33
|
+
#
|
34
34
|
# if [logger] == "TASK_START" {
|
35
35
|
# aggregate {
|
36
36
|
# task_id => "%{taskid}"
|
@@ -38,7 +38,7 @@ require "logstash/util/decorators"
|
|
38
38
|
# map_action => "create"
|
39
39
|
# }
|
40
40
|
# }
|
41
|
-
#
|
41
|
+
#
|
42
42
|
# if [logger] == "SQL" {
|
43
43
|
# aggregate {
|
44
44
|
# task_id => "%{taskid}"
|
@@ -46,7 +46,7 @@ require "logstash/util/decorators"
|
|
46
46
|
# map_action => "update"
|
47
47
|
# }
|
48
48
|
# }
|
49
|
-
#
|
49
|
+
#
|
50
50
|
# if [logger] == "TASK_END" {
|
51
51
|
# aggregate {
|
52
52
|
# task_id => "%{taskid}"
|
@@ -59,7 +59,7 @@ require "logstash/util/decorators"
|
|
59
59
|
# }
|
60
60
|
# ----------------------------------
|
61
61
|
#
|
62
|
-
# * the final event then looks like :
|
62
|
+
# * the final event then looks like :
|
63
63
|
# [source,ruby]
|
64
64
|
# ----------------------------------
|
65
65
|
# {
|
@@ -67,11 +67,11 @@ require "logstash/util/decorators"
|
|
67
67
|
# "sql_duration" => 46
|
68
68
|
# }
|
69
69
|
# ----------------------------------
|
70
|
-
#
|
70
|
+
#
|
71
71
|
# the field `sql_duration` is added and contains the sum of all sql queries durations.
|
72
|
-
#
|
72
|
+
#
|
73
73
|
# ==== Example #2 : no start event
|
74
|
-
#
|
74
|
+
#
|
75
75
|
# * If you have the same logs than example #1, but without a start log :
|
76
76
|
# [source,ruby]
|
77
77
|
# ----------------------------------
|
@@ -79,22 +79,22 @@ require "logstash/util/decorators"
|
|
79
79
|
# INFO - 12345 - SQL - sqlQuery2 - 34
|
80
80
|
# INFO - 12345 - TASK_END - end
|
81
81
|
# ----------------------------------
|
82
|
-
#
|
83
|
-
# * you can also aggregate "sql duration" with a slightly different configuration :
|
82
|
+
#
|
83
|
+
# * you can also aggregate "sql duration" with a slightly different configuration :
|
84
84
|
# [source,ruby]
|
85
85
|
# ----------------------------------
|
86
86
|
# filter {
|
87
87
|
# grok {
|
88
88
|
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
89
89
|
# }
|
90
|
-
#
|
90
|
+
#
|
91
91
|
# if [logger] == "SQL" {
|
92
92
|
# aggregate {
|
93
93
|
# task_id => "%{taskid}"
|
94
94
|
# code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event.get('duration')"
|
95
95
|
# }
|
96
96
|
# }
|
97
|
-
#
|
97
|
+
#
|
98
98
|
# if [logger] == "TASK_END" {
|
99
99
|
# aggregate {
|
100
100
|
# task_id => "%{taskid}"
|
@@ -112,15 +112,15 @@ require "logstash/util/decorators"
|
|
112
112
|
#
|
113
113
|
# ==== Example #3 : no end event
|
114
114
|
#
|
115
|
-
# Third use case: You have no specific end event.
|
115
|
+
# Third use case: You have no specific end event.
|
116
116
|
#
|
117
117
|
# A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction.
|
118
118
|
#
|
119
|
-
# In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
|
119
|
+
# In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
|
120
120
|
# In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
|
121
|
-
# We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
|
121
|
+
# We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
|
122
122
|
#
|
123
|
-
# * Given these logs:
|
123
|
+
# * Given these logs:
|
124
124
|
#
|
125
125
|
# [source,ruby]
|
126
126
|
# ----------------------------------
|
@@ -130,7 +130,7 @@ require "logstash/util/decorators"
|
|
130
130
|
# ----------------------------------
|
131
131
|
#
|
132
132
|
# * You can aggregate the amount of clicks the user did like this:
|
133
|
-
#
|
133
|
+
#
|
134
134
|
# [source,ruby]
|
135
135
|
# ----------------------------------
|
136
136
|
# filter {
|
@@ -165,12 +165,12 @@ require "logstash/util/decorators"
|
|
165
165
|
# ----------------------------------
|
166
166
|
#
|
167
167
|
# ==== Example #4 : no end event and tasks come one after the other
|
168
|
-
#
|
169
|
-
# Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
|
170
|
-
# That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
|
171
|
-
# In that case, you don't want to wait task timeout to flush aggregation map.
|
172
|
-
# * A typical case is aggregating results from jdbc input plugin.
|
173
|
-
# * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
|
168
|
+
#
|
169
|
+
# Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
|
170
|
+
# That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
|
171
|
+
# In that case, you don't want to wait task timeout to flush aggregation map.
|
172
|
+
# * A typical case is aggregating results from jdbc input plugin.
|
173
|
+
# * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
|
174
174
|
# * Using jdbc input plugin, you get these 3 events from :
|
175
175
|
# [source,json]
|
176
176
|
# ----------------------------------
|
@@ -204,8 +204,68 @@ require "logstash/util/decorators"
|
|
204
204
|
# * The key point is that each time aggregate plugin detects a new `country_name`, it pushes previous aggregate map as a new Logstash event, and then creates a new empty map for the next country
|
205
205
|
# * When 5s timeout comes, the last aggregate map is pushed as a new event
|
206
206
|
# * Finally, initial events (which are not aggregated) are dropped because useless (thanks to `event.cancel()`)
|
207
|
-
#
|
208
|
-
#
|
207
|
+
#
|
208
|
+
#
|
209
|
+
# ==== Example #5 : no end event and push events as soon as possible
|
210
|
+
#
|
211
|
+
# Fifth use case: like example #3, there is no end event. Events keep comming for an indefinite time and you want to push the aggregation map as soon as possible after the last user interaction without waiting for the `timeout`. This allows to have the aggregated events pushed closer to real time.
|
212
|
+
#
|
213
|
+
# A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction. The user ineraction will be considered as ended when no events for the specified user (task_id) arrive after the specified inactivity_timeout`.
|
214
|
+
#
|
215
|
+
# If the user continues interacting for longer than `timeout` seconds (since first event), the aggregation map will still be deleted and pushed as a new event when timeout occurs.
|
216
|
+
#
|
217
|
+
# The difference with example #3 is that the events will be pushed as soon as the user stops interacting for `inactivity_timeout` seconds instead of waiting for the end of `timeout` seconds since first event.
|
218
|
+
#
|
219
|
+
# In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when inactivity timeout occurs.
|
220
|
+
# In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
|
221
|
+
# We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
|
222
|
+
#
|
223
|
+
# * Given these logs:
|
224
|
+
#
|
225
|
+
# [source,ruby]
|
226
|
+
# ----------------------------------
|
227
|
+
# INFO - 12345 - Clicked One
|
228
|
+
# INFO - 12345 - Clicked Two
|
229
|
+
# INFO - 12345 - Clicked Three
|
230
|
+
# ----------------------------------
|
231
|
+
#
|
232
|
+
# * You can aggregate the amount of clicks the user did like this:
|
233
|
+
#
|
234
|
+
# [source,ruby]
|
235
|
+
# ----------------------------------
|
236
|
+
# filter {
|
237
|
+
# grok {
|
238
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:user_id} - %{GREEDYDATA:msg_text}" ]
|
239
|
+
# }
|
240
|
+
#
|
241
|
+
# aggregate {
|
242
|
+
# task_id => "%{user_id}"
|
243
|
+
# code => "map['clicks'] ||= 0; map['clicks'] += 1;"
|
244
|
+
# push_map_as_event_on_timeout => true
|
245
|
+
# timeout_task_id_field => "user_id"
|
246
|
+
# timeout => 3600 # 1 hour timeout, user activity will be considered finished one hour after the first event, even if events keep comming
|
247
|
+
# inactivity_timeout => 300 # 5 minutes timeout, user activity will be considered finished if no new events arrive 5 minutes after the last event
|
248
|
+
# timeout_tags => ['_aggregatetimeout']
|
249
|
+
# timeout_code => "event.set('several_clicks', event.get('clicks') > 1)"
|
250
|
+
# }
|
251
|
+
# }
|
252
|
+
# ----------------------------------
|
253
|
+
#
|
254
|
+
# * After five minutes of inactivity or one hour since first event, this will yield an event like:
|
255
|
+
#
|
256
|
+
# [source,json]
|
257
|
+
# ----------------------------------
|
258
|
+
# {
|
259
|
+
# "user_id": "12345",
|
260
|
+
# "clicks": 3,
|
261
|
+
# "several_clicks": true,
|
262
|
+
# "tags": [
|
263
|
+
# "_aggregatetimeout"
|
264
|
+
# ]
|
265
|
+
# }
|
266
|
+
# ----------------------------------
|
267
|
+
#
|
268
|
+
#
|
209
269
|
# ==== How it works
|
210
270
|
# * the filter needs a "task_id" to correlate events (log lines) of a same task
|
211
271
|
# * at the task beggining, filter creates a map, attached to task_id
|
@@ -215,7 +275,7 @@ require "logstash/util/decorators"
|
|
215
275
|
# * an aggregate map is tied to one task_id value which is tied to one task_id pattern. So if you have 2 filters with different task_id patterns, even if you have same task_id value, they won't share the same aggregate map.
|
216
276
|
# * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
217
277
|
# * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
218
|
-
# * all timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
|
278
|
+
# * all timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : timeout, inactivity_timeout,timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
|
219
279
|
# * if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
220
280
|
#
|
221
281
|
#
|
@@ -234,7 +294,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
234
294
|
# ############## #
|
235
295
|
# CONFIG OPTIONS #
|
236
296
|
# ############## #
|
237
|
-
|
297
|
+
|
238
298
|
|
239
299
|
config_name "aggregate"
|
240
300
|
|
@@ -275,13 +335,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
275
335
|
# `"create_or_update"`: create the map if it wasn't created before, execute the code in all cases
|
276
336
|
config :map_action, :validate => :string, :default => "create_or_update"
|
277
337
|
|
278
|
-
# Tell the filter that task is ended, and therefore, to delete aggregate map after code execution.
|
338
|
+
# Tell the filter that task is ended, and therefore, to delete aggregate map after code execution.
|
279
339
|
config :end_of_task, :validate => :boolean, :default => false
|
280
340
|
|
281
341
|
# The path to file where aggregate maps are stored when Logstash stops
|
282
342
|
# and are loaded from when Logstash starts.
|
283
343
|
#
|
284
|
-
# If not defined, aggregate maps will not be stored at Logstash stop and will be lost.
|
344
|
+
# If not defined, aggregate maps will not be stored at Logstash stop and will be lost.
|
285
345
|
# Must be defined in only one aggregate filter (as aggregate maps are global).
|
286
346
|
#
|
287
347
|
# Example:
|
@@ -292,20 +352,35 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
292
352
|
# }
|
293
353
|
# }
|
294
354
|
config :aggregate_maps_path, :validate => :string, :required => false
|
295
|
-
|
296
|
-
# The amount of seconds after a task
|
355
|
+
|
356
|
+
# The amount of seconds (since the first event) after which a task is considered as expired.
|
297
357
|
#
|
298
|
-
# When timeout occurs for a task,
|
358
|
+
# When timeout occurs for a task, its aggregate map is evicted.
|
359
|
+
#
|
360
|
+
# If 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true, the task aggregation map is pushed as a new Logstash event.
|
299
361
|
#
|
300
362
|
# Timeout can be defined for each "task_id" pattern.
|
301
363
|
#
|
302
364
|
# If no timeout is defined, default timeout will be applied : 1800 seconds.
|
303
365
|
config :timeout, :validate => :number, :required => false
|
304
366
|
|
305
|
-
# The
|
306
|
-
#
|
367
|
+
# The amount of seconds (since the last event) after which a task is considered as expired.
|
368
|
+
#
|
369
|
+
# When timeout occurs for a task, its aggregate map is evicted.
|
370
|
+
#
|
371
|
+
# If 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true, the task aggregation map is pushed as a new Logstash event.
|
372
|
+
#
|
373
|
+
# `inactivity_timeout` can be defined for each "task_id" pattern.
|
374
|
+
#
|
375
|
+
# `inactivity_timeout` must be lower than `timeout`.
|
376
|
+
#
|
377
|
+
# If no `inactivity_timeout` is defined, no inactivity timeout will be applied (only timeout will be applied).
|
378
|
+
config :inactivity_timeout, :validate => :number, :required => false
|
379
|
+
|
380
|
+
# The code to execute to complete timeout generated event, when 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true.
|
381
|
+
# The code block will have access to the newly generated timeout event that is pre-populated with the aggregation map.
|
307
382
|
#
|
308
|
-
# If `'timeout_task_id_field'` is set, the event is also populated with the task_id value
|
383
|
+
# If `'timeout_task_id_field'` is set, the event is also populated with the task_id value
|
309
384
|
#
|
310
385
|
# Example:
|
311
386
|
# [source,ruby]
|
@@ -316,19 +391,19 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
316
391
|
# }
|
317
392
|
config :timeout_code, :validate => :string, :required => false
|
318
393
|
|
319
|
-
# When this option is enabled, each time a task timeout is detected, it pushes task aggregation map as a new Logstash event.
|
394
|
+
# When this option is enabled, each time a task timeout is detected, it pushes task aggregation map as a new Logstash event.
|
320
395
|
# This enables to detect and process task timeouts in Logstash, but also to manage tasks that have no explicit end event.
|
321
396
|
config :push_map_as_event_on_timeout, :validate => :boolean, :required => false, :default => false
|
322
397
|
|
323
|
-
# When this option is enabled, each time aggregate plugin detects a new task id, it pushes previous aggregate map as a new Logstash event,
|
398
|
+
# When this option is enabled, each time aggregate plugin detects a new task id, it pushes previous aggregate map as a new Logstash event,
|
324
399
|
# and then creates a new empty map for the next task.
|
325
400
|
#
|
326
401
|
# WARNING: this option works fine only if tasks come one after the other. It means : all task1 events, then all task2 events, etc...
|
327
402
|
config :push_previous_map_as_event, :validate => :boolean, :required => false, :default => false
|
328
|
-
|
329
|
-
# This option indicates the timeout generated event's field for the "task_id" value.
|
403
|
+
|
404
|
+
# This option indicates the timeout generated event's field for the "task_id" value.
|
330
405
|
# The task id will then be set into the timeout event. This can help correlate which tasks have been timed out.
|
331
|
-
#
|
406
|
+
#
|
332
407
|
# For example, with option `timeout_task_id_field => "my_id"` ,when timeout task id is `"12345"`, the generated timeout event will contain `'my_id' => '12345'`.
|
333
408
|
#
|
334
409
|
# By default, if this option is not set, task id value won't be set into timeout generated event.
|
@@ -350,7 +425,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
350
425
|
# STATIC VARIABLES #
|
351
426
|
# ################ #
|
352
427
|
|
353
|
-
|
428
|
+
|
354
429
|
# Default timeout (in seconds) when not defined in plugin configuration
|
355
430
|
DEFAULT_TIMEOUT = 1800
|
356
431
|
|
@@ -376,24 +451,24 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
376
451
|
|
377
452
|
# defines which Aggregate instance will close Aggregate static variables
|
378
453
|
@@static_close_instance = nil
|
379
|
-
|
454
|
+
|
380
455
|
|
381
456
|
# ####### #
|
382
457
|
# METHODS #
|
383
458
|
# ####### #
|
384
459
|
|
385
|
-
|
460
|
+
|
386
461
|
# Initialize plugin
|
387
462
|
public
|
388
463
|
def register
|
389
|
-
|
464
|
+
|
390
465
|
@logger.debug("Aggregate register call", :code => @code)
|
391
466
|
|
392
467
|
# validate task_id option
|
393
468
|
if !@task_id.match(/%\{.+\}/)
|
394
469
|
raise LogStash::ConfigurationError, "Aggregate plugin: task_id pattern '#{@task_id}' must contain a dynamic expression like '%{field}'"
|
395
470
|
end
|
396
|
-
|
471
|
+
|
397
472
|
# process lambda expression to call in each filter call
|
398
473
|
eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
|
399
474
|
|
@@ -401,9 +476,9 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
401
476
|
if @timeout_code
|
402
477
|
eval("@timeout_codeblock = lambda { |event| #{@timeout_code} }", binding, "(aggregate filter timeout code)")
|
403
478
|
end
|
404
|
-
|
479
|
+
|
405
480
|
@@mutex.synchronize do
|
406
|
-
|
481
|
+
|
407
482
|
# timeout management : define eviction_instance for current task_id pattern
|
408
483
|
if has_timeout_options?
|
409
484
|
if @@flush_instance_map.has_key?(@task_id)
|
@@ -414,12 +489,17 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
414
489
|
@logger.debug("Aggregate timeout for '#{@task_id}' pattern: #{@timeout} seconds")
|
415
490
|
end
|
416
491
|
|
417
|
-
# timeout management : define default_timeout
|
492
|
+
# timeout management : define default_timeout
|
418
493
|
if !@timeout.nil? && (@@default_timeout.nil? || @timeout < @@default_timeout)
|
419
494
|
@@default_timeout = @timeout
|
420
495
|
@logger.debug("Aggregate default timeout: #{@timeout} seconds")
|
421
496
|
end
|
422
497
|
|
498
|
+
# inactivity timeout management: make sure it is lower than timeout
|
499
|
+
if !@inactivity_timeout.nil? && ((!@timeout.nil? && @inactivity_timeout > @timeout) || (!@@default_timeout.nil? && @inactivity_timeout > @@default_timeout))
|
500
|
+
raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, inactivity_timeout must be lower than timeout"
|
501
|
+
end
|
502
|
+
|
423
503
|
# reinit static_close_instance (if necessary)
|
424
504
|
if !@@aggregate_maps_path_set && !@@static_close_instance.nil?
|
425
505
|
@@static_close_instance = nil
|
@@ -435,14 +515,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
435
515
|
@@static_close_instance = self
|
436
516
|
end
|
437
517
|
end
|
438
|
-
|
518
|
+
|
439
519
|
# load aggregate maps from file (if option defined)
|
440
520
|
if !@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path)
|
441
521
|
File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps.merge!(Marshal.load(from_file)) }
|
442
522
|
File.delete(@aggregate_maps_path)
|
443
523
|
@logger.info("Aggregate maps loaded from : #{@aggregate_maps_path}")
|
444
524
|
end
|
445
|
-
|
525
|
+
|
446
526
|
# init aggregate_maps
|
447
527
|
@@aggregate_maps[@task_id] ||= {}
|
448
528
|
end
|
@@ -451,10 +531,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
451
531
|
# Called when Logstash stops
|
452
532
|
public
|
453
533
|
def close
|
454
|
-
|
534
|
+
|
455
535
|
@logger.debug("Aggregate close call", :code => @code)
|
456
536
|
|
457
|
-
# define static close instance if none is already defined
|
537
|
+
# define static close instance if none is already defined
|
458
538
|
@@static_close_instance = self if @@static_close_instance.nil?
|
459
539
|
|
460
540
|
if @@static_close_instance == self
|
@@ -467,16 +547,16 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
467
547
|
end
|
468
548
|
@@aggregate_maps.clear()
|
469
549
|
end
|
470
|
-
|
550
|
+
|
471
551
|
# reinit static variables for Logstash reload
|
472
552
|
@@default_timeout = nil
|
473
553
|
@@flush_instance_map = {}
|
474
554
|
@@last_flush_timestamp_map = {}
|
475
555
|
@@aggregate_maps_path_set = false
|
476
556
|
end
|
477
|
-
|
557
|
+
|
478
558
|
end
|
479
|
-
|
559
|
+
|
480
560
|
# This method is invoked each time an event matches the filter
|
481
561
|
public
|
482
562
|
def filter(event)
|
@@ -490,10 +570,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
490
570
|
|
491
571
|
# protect aggregate_maps against concurrent access, using a mutex
|
492
572
|
@@mutex.synchronize do
|
493
|
-
|
573
|
+
|
494
574
|
# retrieve the current aggregate map
|
495
575
|
aggregate_maps_element = @@aggregate_maps[@task_id][task_id]
|
496
|
-
|
576
|
+
|
497
577
|
|
498
578
|
# create aggregate map, if it doesn't exist
|
499
579
|
if aggregate_maps_element.nil?
|
@@ -508,24 +588,25 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
508
588
|
return if @map_action == "create"
|
509
589
|
end
|
510
590
|
map = aggregate_maps_element.map
|
511
|
-
|
591
|
+
# update last event timestamp
|
592
|
+
aggregate_maps_element.lastevent_timestamp = Time.now
|
512
593
|
# execute the code to read/update map and event
|
513
594
|
begin
|
514
595
|
@codeblock.call(event, map)
|
515
596
|
@logger.debug("Aggregate successful filter code execution", :code => @code)
|
516
597
|
noError = true
|
517
598
|
rescue => exception
|
518
|
-
@logger.error("Aggregate exception occurred",
|
599
|
+
@logger.error("Aggregate exception occurred",
|
519
600
|
:error => exception,
|
520
601
|
:code => @code,
|
521
602
|
:map => map,
|
522
603
|
:event_data => event.to_hash_with_metadata)
|
523
604
|
event.tag("_aggregateexception")
|
524
605
|
end
|
525
|
-
|
606
|
+
|
526
607
|
# delete the map if task is ended
|
527
608
|
@@aggregate_maps[@task_id].delete(task_id) if @end_of_task
|
528
|
-
|
609
|
+
|
529
610
|
end
|
530
611
|
|
531
612
|
# match the filter, only if no error occurred
|
@@ -544,13 +625,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
544
625
|
def create_timeout_event(aggregation_map, task_id)
|
545
626
|
|
546
627
|
@logger.debug("Aggregate create_timeout_event call with task_id '#{task_id}'")
|
547
|
-
|
548
|
-
event_to_yield = LogStash::Event.new(aggregation_map)
|
628
|
+
|
629
|
+
event_to_yield = LogStash::Event.new(aggregation_map)
|
549
630
|
|
550
631
|
if @timeout_task_id_field
|
551
632
|
event_to_yield.set(@timeout_task_id_field, task_id)
|
552
633
|
end
|
553
|
-
|
634
|
+
|
554
635
|
LogStash::Util::Decorators.add_tags(@timeout_tags, event_to_yield, "filters/#{self.class.name}")
|
555
636
|
|
556
637
|
# Call code block if available
|
@@ -558,16 +639,16 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
558
639
|
begin
|
559
640
|
@timeout_codeblock.call(event_to_yield)
|
560
641
|
rescue => exception
|
561
|
-
@logger.error("Aggregate exception occurred",
|
642
|
+
@logger.error("Aggregate exception occurred",
|
562
643
|
:error => exception,
|
563
644
|
:timeout_code => @timeout_code,
|
564
645
|
:timeout_event_data => event_to_yield.to_hash_with_metadata)
|
565
646
|
event_to_yield.tag("_aggregateexception")
|
566
647
|
end
|
567
648
|
end
|
568
|
-
|
649
|
+
|
569
650
|
return event_to_yield
|
570
|
-
end
|
651
|
+
end
|
571
652
|
|
572
653
|
# Extract the previous map in aggregate maps, and return it as a new Logstash event
|
573
654
|
def extract_previous_map_as_event
|
@@ -581,12 +662,12 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
581
662
|
def periodic_flush
|
582
663
|
true
|
583
664
|
end
|
584
|
-
|
665
|
+
|
585
666
|
# This method is invoked by LogStash every 5 seconds.
|
586
667
|
def flush(options = {})
|
587
|
-
|
668
|
+
|
588
669
|
@logger.debug("Aggregate flush call with #{options}")
|
589
|
-
|
670
|
+
|
590
671
|
# Protection against no timeout defined by Logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
|
591
672
|
if @@default_timeout.nil?
|
592
673
|
@@default_timeout = DEFAULT_TIMEOUT
|
@@ -597,9 +678,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
597
678
|
elsif @@flush_instance_map[@task_id].timeout.nil?
|
598
679
|
@@flush_instance_map[@task_id].timeout = @@default_timeout
|
599
680
|
end
|
600
|
-
|
601
|
-
|
602
|
-
|
681
|
+
|
682
|
+
if @@flush_instance_map[@task_id].inactivity_timeout.nil?
|
683
|
+
@@flush_instance_map[@task_id].inactivity_timeout = @@flush_instance_map[@task_id].timeout
|
684
|
+
end
|
685
|
+
|
686
|
+
# Launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
|
687
|
+
if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
|
603
688
|
events_to_flush = remove_expired_maps()
|
604
689
|
|
605
690
|
# at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
|
@@ -611,10 +696,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
611
696
|
if options[:final]
|
612
697
|
events_to_flush.each { |event_to_flush| event_to_flush.tag("_aggregatefinalflush") }
|
613
698
|
end
|
614
|
-
|
699
|
+
|
615
700
|
# update last flush timestamp
|
616
701
|
@@last_flush_timestamp_map[@task_id] = Time.now
|
617
|
-
|
702
|
+
|
618
703
|
# return events to flush into Logstash pipeline
|
619
704
|
return events_to_flush
|
620
705
|
else
|
@@ -623,19 +708,20 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
623
708
|
|
624
709
|
end
|
625
710
|
|
626
|
-
|
627
|
-
# Remove the expired Aggregate maps from @@aggregate_maps if they are older than timeout.
|
711
|
+
|
712
|
+
# Remove the expired Aggregate maps from @@aggregate_maps if they are older than timeout or if no new event has been received since inactivity_timeout.
|
628
713
|
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired maps are returned as new events to be flushed to Logstash pipeline.
|
629
714
|
def remove_expired_maps()
|
630
715
|
events_to_flush = []
|
631
716
|
min_timestamp = Time.now - @timeout
|
632
|
-
|
717
|
+
min_inactivity_timestamp = Time.now - @inactivity_timeout
|
718
|
+
|
633
719
|
@@mutex.synchronize do
|
634
720
|
|
635
721
|
@logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@@aggregate_maps[@task_id].length} maps")
|
636
722
|
|
637
|
-
@@aggregate_maps[@task_id].delete_if do |key, element|
|
638
|
-
if element.creation_timestamp < min_timestamp
|
723
|
+
@@aggregate_maps[@task_id].delete_if do |key, element|
|
724
|
+
if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
|
639
725
|
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
640
726
|
events_to_flush << create_timeout_event(element.map, key)
|
641
727
|
end
|
@@ -644,14 +730,15 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
644
730
|
next false
|
645
731
|
end
|
646
732
|
end
|
647
|
-
|
733
|
+
|
648
734
|
return events_to_flush
|
649
735
|
end
|
650
|
-
|
651
|
-
# return if this filter instance has any timeout option enabled in
|
736
|
+
|
737
|
+
# return if this filter instance has any timeout option enabled in logstash configuration
|
652
738
|
def has_timeout_options?()
|
653
739
|
return (
|
654
740
|
timeout ||
|
741
|
+
inactivity_timeout ||
|
655
742
|
timeout_code ||
|
656
743
|
push_map_as_event_on_timeout ||
|
657
744
|
push_previous_map_as_event ||
|
@@ -664,6 +751,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
|
664
751
|
def display_timeout_options()
|
665
752
|
return [
|
666
753
|
"timeout",
|
754
|
+
"inactivity_timeout",
|
667
755
|
"timeout_code",
|
668
756
|
"push_map_as_event_on_timeout",
|
669
757
|
"push_previous_map_as_event",
|
@@ -677,10 +765,11 @@ end # class LogStash::Filters::Aggregate
|
|
677
765
|
# Element of "aggregate_maps"
|
678
766
|
class LogStash::Filters::Aggregate::Element
|
679
767
|
|
680
|
-
attr_accessor :creation_timestamp, :map
|
768
|
+
attr_accessor :creation_timestamp, :lastevent_timestamp, :map
|
681
769
|
|
682
770
|
def initialize(creation_timestamp)
|
683
771
|
@creation_timestamp = creation_timestamp
|
772
|
+
@lastevent_timestamp = creation_timestamp
|
684
773
|
@map = {}
|
685
774
|
end
|
686
|
-
end
|
775
|
+
end
|