logstash-filter-aggregate 2.5.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,16 +5,16 @@ require "logstash/namespace"
5
5
  require "thread"
6
6
  require "logstash/util/decorators"
7
7
 
8
- #
8
+ #
9
9
  # The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
10
10
  # and finally push aggregated information into final task event.
11
11
  #
12
- # You should be very careful to set Logstash filter workers to 1 (`-w 1` flag) for this filter to work correctly
12
+ # You should be very careful to set Logstash filter workers to 1 (`-w 1` flag) for this filter to work correctly
13
13
  # otherwise events may be processed out of sequence and unexpected results will occur.
14
- #
14
+ #
15
15
  # ==== Example #1
16
- #
17
- # * with these given logs :
16
+ #
17
+ # * with these given logs :
18
18
  # [source,ruby]
19
19
  # ----------------------------------
20
20
  # INFO - 12345 - TASK_START - start
@@ -22,7 +22,7 @@ require "logstash/util/decorators"
22
22
  # INFO - 12345 - SQL - sqlQuery2 - 34
23
23
  # INFO - 12345 - TASK_END - end
24
24
  # ----------------------------------
25
- #
25
+ #
26
26
  # * you can aggregate "sql duration" for the whole task with this configuration :
27
27
  # [source,ruby]
28
28
  # ----------------------------------
@@ -30,7 +30,7 @@ require "logstash/util/decorators"
30
30
  # grok {
31
31
  # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
32
32
  # }
33
- #
33
+ #
34
34
  # if [logger] == "TASK_START" {
35
35
  # aggregate {
36
36
  # task_id => "%{taskid}"
@@ -38,7 +38,7 @@ require "logstash/util/decorators"
38
38
  # map_action => "create"
39
39
  # }
40
40
  # }
41
- #
41
+ #
42
42
  # if [logger] == "SQL" {
43
43
  # aggregate {
44
44
  # task_id => "%{taskid}"
@@ -46,7 +46,7 @@ require "logstash/util/decorators"
46
46
  # map_action => "update"
47
47
  # }
48
48
  # }
49
- #
49
+ #
50
50
  # if [logger] == "TASK_END" {
51
51
  # aggregate {
52
52
  # task_id => "%{taskid}"
@@ -59,7 +59,7 @@ require "logstash/util/decorators"
59
59
  # }
60
60
  # ----------------------------------
61
61
  #
62
- # * the final event then looks like :
62
+ # * the final event then looks like :
63
63
  # [source,ruby]
64
64
  # ----------------------------------
65
65
  # {
@@ -67,11 +67,11 @@ require "logstash/util/decorators"
67
67
  # "sql_duration" => 46
68
68
  # }
69
69
  # ----------------------------------
70
- #
70
+ #
71
71
  # the field `sql_duration` is added and contains the sum of all sql queries durations.
72
- #
72
+ #
73
73
  # ==== Example #2 : no start event
74
- #
74
+ #
75
75
  # * If you have the same logs than example #1, but without a start log :
76
76
  # [source,ruby]
77
77
  # ----------------------------------
@@ -79,22 +79,22 @@ require "logstash/util/decorators"
79
79
  # INFO - 12345 - SQL - sqlQuery2 - 34
80
80
  # INFO - 12345 - TASK_END - end
81
81
  # ----------------------------------
82
- #
83
- # * you can also aggregate "sql duration" with a slightly different configuration :
82
+ #
83
+ # * you can also aggregate "sql duration" with a slightly different configuration :
84
84
  # [source,ruby]
85
85
  # ----------------------------------
86
86
  # filter {
87
87
  # grok {
88
88
  # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
89
89
  # }
90
- #
90
+ #
91
91
  # if [logger] == "SQL" {
92
92
  # aggregate {
93
93
  # task_id => "%{taskid}"
94
94
  # code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event.get('duration')"
95
95
  # }
96
96
  # }
97
- #
97
+ #
98
98
  # if [logger] == "TASK_END" {
99
99
  # aggregate {
100
100
  # task_id => "%{taskid}"
@@ -112,15 +112,15 @@ require "logstash/util/decorators"
112
112
  #
113
113
  # ==== Example #3 : no end event
114
114
  #
115
- # Third use case: You have no specific end event.
115
+ # Third use case: You have no specific end event.
116
116
  #
117
117
  # A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction.
118
118
  #
119
- # In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
119
+ # In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
120
120
  # In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
121
- # We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
121
+ # We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
122
122
  #
123
- # * Given these logs:
123
+ # * Given these logs:
124
124
  #
125
125
  # [source,ruby]
126
126
  # ----------------------------------
@@ -130,7 +130,7 @@ require "logstash/util/decorators"
130
130
  # ----------------------------------
131
131
  #
132
132
  # * You can aggregate the amount of clicks the user did like this:
133
- #
133
+ #
134
134
  # [source,ruby]
135
135
  # ----------------------------------
136
136
  # filter {
@@ -165,12 +165,12 @@ require "logstash/util/decorators"
165
165
  # ----------------------------------
166
166
  #
167
167
  # ==== Example #4 : no end event and tasks come one after the other
168
- #
169
- # Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
170
- # That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
171
- # In that case, you don't want to wait task timeout to flush aggregation map.
172
- # * A typical case is aggregating results from jdbc input plugin.
173
- # * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
168
+ #
169
+ # Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
170
+ # That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
171
+ # In that case, you don't want to wait task timeout to flush aggregation map.
172
+ # * A typical case is aggregating results from jdbc input plugin.
173
+ # * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
174
174
  # * Using jdbc input plugin, you get these 3 events from :
175
175
  # [source,json]
176
176
  # ----------------------------------
@@ -204,8 +204,68 @@ require "logstash/util/decorators"
204
204
  # * The key point is that each time aggregate plugin detects a new `country_name`, it pushes previous aggregate map as a new Logstash event, and then creates a new empty map for the next country
205
205
  # * When 5s timeout comes, the last aggregate map is pushed as a new event
206
206
  # * Finally, initial events (which are not aggregated) are dropped because useless (thanks to `event.cancel()`)
207
- #
208
- #
207
+ #
208
+ #
209
+ # ==== Example #5 : no end event and push events as soon as possible
210
+ #
211
+ # Fifth use case: like example #3, there is no end event. Events keep comming for an indefinite time and you want to push the aggregation map as soon as possible after the last user interaction without waiting for the `timeout`. This allows to have the aggregated events pushed closer to real time.
212
+ #
213
+ # A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction. The user ineraction will be considered as ended when no events for the specified user (task_id) arrive after the specified inactivity_timeout`.
214
+ #
215
+ # If the user continues interacting for longer than `timeout` seconds (since first event), the aggregation map will still be deleted and pushed as a new event when timeout occurs.
216
+ #
217
+ # The difference with example #3 is that the events will be pushed as soon as the user stops interacting for `inactivity_timeout` seconds instead of waiting for the end of `timeout` seconds since first event.
218
+ #
219
+ # In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when inactivity timeout occurs.
220
+ # In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
221
+ # We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
222
+ #
223
+ # * Given these logs:
224
+ #
225
+ # [source,ruby]
226
+ # ----------------------------------
227
+ # INFO - 12345 - Clicked One
228
+ # INFO - 12345 - Clicked Two
229
+ # INFO - 12345 - Clicked Three
230
+ # ----------------------------------
231
+ #
232
+ # * You can aggregate the amount of clicks the user did like this:
233
+ #
234
+ # [source,ruby]
235
+ # ----------------------------------
236
+ # filter {
237
+ # grok {
238
+ # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:user_id} - %{GREEDYDATA:msg_text}" ]
239
+ # }
240
+ #
241
+ # aggregate {
242
+ # task_id => "%{user_id}"
243
+ # code => "map['clicks'] ||= 0; map['clicks'] += 1;"
244
+ # push_map_as_event_on_timeout => true
245
+ # timeout_task_id_field => "user_id"
246
+ # timeout => 3600 # 1 hour timeout, user activity will be considered finished one hour after the first event, even if events keep comming
247
+ # inactivity_timeout => 300 # 5 minutes timeout, user activity will be considered finished if no new events arrive 5 minutes after the last event
248
+ # timeout_tags => ['_aggregatetimeout']
249
+ # timeout_code => "event.set('several_clicks', event.get('clicks') > 1)"
250
+ # }
251
+ # }
252
+ # ----------------------------------
253
+ #
254
+ # * After five minutes of inactivity or one hour since first event, this will yield an event like:
255
+ #
256
+ # [source,json]
257
+ # ----------------------------------
258
+ # {
259
+ # "user_id": "12345",
260
+ # "clicks": 3,
261
+ # "several_clicks": true,
262
+ # "tags": [
263
+ # "_aggregatetimeout"
264
+ # ]
265
+ # }
266
+ # ----------------------------------
267
+ #
268
+ #
209
269
  # ==== How it works
210
270
  # * the filter needs a "task_id" to correlate events (log lines) of a same task
211
271
  # * at the task beggining, filter creates a map, attached to task_id
@@ -215,7 +275,7 @@ require "logstash/util/decorators"
215
275
  # * an aggregate map is tied to one task_id value which is tied to one task_id pattern. So if you have 2 filters with different task_id patterns, even if you have same task_id value, they won't share the same aggregate map.
216
276
  # * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
217
277
  # * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
218
- # * all timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
278
+ # * all timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : timeout, inactivity_timeout,timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
219
279
  # * if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
220
280
  #
221
281
  #
@@ -234,7 +294,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
234
294
  # ############## #
235
295
  # CONFIG OPTIONS #
236
296
  # ############## #
237
-
297
+
238
298
 
239
299
  config_name "aggregate"
240
300
 
@@ -275,13 +335,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
275
335
  # `"create_or_update"`: create the map if it wasn't created before, execute the code in all cases
276
336
  config :map_action, :validate => :string, :default => "create_or_update"
277
337
 
278
- # Tell the filter that task is ended, and therefore, to delete aggregate map after code execution.
338
+ # Tell the filter that task is ended, and therefore, to delete aggregate map after code execution.
279
339
  config :end_of_task, :validate => :boolean, :default => false
280
340
 
281
341
  # The path to file where aggregate maps are stored when Logstash stops
282
342
  # and are loaded from when Logstash starts.
283
343
  #
284
- # If not defined, aggregate maps will not be stored at Logstash stop and will be lost.
344
+ # If not defined, aggregate maps will not be stored at Logstash stop and will be lost.
285
345
  # Must be defined in only one aggregate filter (as aggregate maps are global).
286
346
  #
287
347
  # Example:
@@ -292,20 +352,35 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
292
352
  # }
293
353
  # }
294
354
  config :aggregate_maps_path, :validate => :string, :required => false
295
-
296
- # The amount of seconds after a task "end event" can be considered lost.
355
+
356
+ # The amount of seconds (since the first event) after which a task is considered as expired.
297
357
  #
298
- # When timeout occurs for a task, The task "map" is evicted.
358
+ # When timeout occurs for a task, its aggregate map is evicted.
359
+ #
360
+ # If 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true, the task aggregation map is pushed as a new Logstash event.
299
361
  #
300
362
  # Timeout can be defined for each "task_id" pattern.
301
363
  #
302
364
  # If no timeout is defined, default timeout will be applied : 1800 seconds.
303
365
  config :timeout, :validate => :number, :required => false
304
366
 
305
- # The code to execute to complete timeout generated event, when `'push_map_as_event_on_timeout'` or `'push_previous_map_as_event'` is set to true.
306
- # The code block will have access to the newly generated timeout event that is pre-populated with the aggregation map.
367
+ # The amount of seconds (since the last event) after which a task is considered as expired.
368
+ #
369
+ # When timeout occurs for a task, its aggregate map is evicted.
370
+ #
371
+ # If 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true, the task aggregation map is pushed as a new Logstash event.
372
+ #
373
+ # `inactivity_timeout` can be defined for each "task_id" pattern.
374
+ #
375
+ # `inactivity_timeout` must be lower than `timeout`.
376
+ #
377
+ # If no `inactivity_timeout` is defined, no inactivity timeout will be applied (only timeout will be applied).
378
+ config :inactivity_timeout, :validate => :number, :required => false
379
+
380
+ # The code to execute to complete timeout generated event, when 'push_map_as_event_on_timeout' or 'push_previous_map_as_event' is set to true.
381
+ # The code block will have access to the newly generated timeout event that is pre-populated with the aggregation map.
307
382
  #
308
- # If `'timeout_task_id_field'` is set, the event is also populated with the task_id value
383
+ # If `'timeout_task_id_field'` is set, the event is also populated with the task_id value
309
384
  #
310
385
  # Example:
311
386
  # [source,ruby]
@@ -316,19 +391,19 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
316
391
  # }
317
392
  config :timeout_code, :validate => :string, :required => false
318
393
 
319
- # When this option is enabled, each time a task timeout is detected, it pushes task aggregation map as a new Logstash event.
394
+ # When this option is enabled, each time a task timeout is detected, it pushes task aggregation map as a new Logstash event.
320
395
  # This enables to detect and process task timeouts in Logstash, but also to manage tasks that have no explicit end event.
321
396
  config :push_map_as_event_on_timeout, :validate => :boolean, :required => false, :default => false
322
397
 
323
- # When this option is enabled, each time aggregate plugin detects a new task id, it pushes previous aggregate map as a new Logstash event,
398
+ # When this option is enabled, each time aggregate plugin detects a new task id, it pushes previous aggregate map as a new Logstash event,
324
399
  # and then creates a new empty map for the next task.
325
400
  #
326
401
  # WARNING: this option works fine only if tasks come one after the other. It means : all task1 events, then all task2 events, etc...
327
402
  config :push_previous_map_as_event, :validate => :boolean, :required => false, :default => false
328
-
329
- # This option indicates the timeout generated event's field for the "task_id" value.
403
+
404
+ # This option indicates the timeout generated event's field for the "task_id" value.
330
405
  # The task id will then be set into the timeout event. This can help correlate which tasks have been timed out.
331
- #
406
+ #
332
407
  # For example, with option `timeout_task_id_field => "my_id"` ,when timeout task id is `"12345"`, the generated timeout event will contain `'my_id' => '12345'`.
333
408
  #
334
409
  # By default, if this option is not set, task id value won't be set into timeout generated event.
@@ -350,7 +425,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
350
425
  # STATIC VARIABLES #
351
426
  # ################ #
352
427
 
353
-
428
+
354
429
  # Default timeout (in seconds) when not defined in plugin configuration
355
430
  DEFAULT_TIMEOUT = 1800
356
431
 
@@ -376,24 +451,24 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
376
451
 
377
452
  # defines which Aggregate instance will close Aggregate static variables
378
453
  @@static_close_instance = nil
379
-
454
+
380
455
 
381
456
  # ####### #
382
457
  # METHODS #
383
458
  # ####### #
384
459
 
385
-
460
+
386
461
  # Initialize plugin
387
462
  public
388
463
  def register
389
-
464
+
390
465
  @logger.debug("Aggregate register call", :code => @code)
391
466
 
392
467
  # validate task_id option
393
468
  if !@task_id.match(/%\{.+\}/)
394
469
  raise LogStash::ConfigurationError, "Aggregate plugin: task_id pattern '#{@task_id}' must contain a dynamic expression like '%{field}'"
395
470
  end
396
-
471
+
397
472
  # process lambda expression to call in each filter call
398
473
  eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
399
474
 
@@ -401,9 +476,9 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
401
476
  if @timeout_code
402
477
  eval("@timeout_codeblock = lambda { |event| #{@timeout_code} }", binding, "(aggregate filter timeout code)")
403
478
  end
404
-
479
+
405
480
  @@mutex.synchronize do
406
-
481
+
407
482
  # timeout management : define eviction_instance for current task_id pattern
408
483
  if has_timeout_options?
409
484
  if @@flush_instance_map.has_key?(@task_id)
@@ -414,12 +489,17 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
414
489
  @logger.debug("Aggregate timeout for '#{@task_id}' pattern: #{@timeout} seconds")
415
490
  end
416
491
 
417
- # timeout management : define default_timeout
492
+ # timeout management : define default_timeout
418
493
  if !@timeout.nil? && (@@default_timeout.nil? || @timeout < @@default_timeout)
419
494
  @@default_timeout = @timeout
420
495
  @logger.debug("Aggregate default timeout: #{@timeout} seconds")
421
496
  end
422
497
 
498
+ # inactivity timeout management: make sure it is lower than timeout
499
+ if !@inactivity_timeout.nil? && ((!@timeout.nil? && @inactivity_timeout > @timeout) || (!@@default_timeout.nil? && @inactivity_timeout > @@default_timeout))
500
+ raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, inactivity_timeout must be lower than timeout"
501
+ end
502
+
423
503
  # reinit static_close_instance (if necessary)
424
504
  if !@@aggregate_maps_path_set && !@@static_close_instance.nil?
425
505
  @@static_close_instance = nil
@@ -435,14 +515,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
435
515
  @@static_close_instance = self
436
516
  end
437
517
  end
438
-
518
+
439
519
  # load aggregate maps from file (if option defined)
440
520
  if !@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path)
441
521
  File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps.merge!(Marshal.load(from_file)) }
442
522
  File.delete(@aggregate_maps_path)
443
523
  @logger.info("Aggregate maps loaded from : #{@aggregate_maps_path}")
444
524
  end
445
-
525
+
446
526
  # init aggregate_maps
447
527
  @@aggregate_maps[@task_id] ||= {}
448
528
  end
@@ -451,10 +531,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
451
531
  # Called when Logstash stops
452
532
  public
453
533
  def close
454
-
534
+
455
535
  @logger.debug("Aggregate close call", :code => @code)
456
536
 
457
- # define static close instance if none is already defined
537
+ # define static close instance if none is already defined
458
538
  @@static_close_instance = self if @@static_close_instance.nil?
459
539
 
460
540
  if @@static_close_instance == self
@@ -467,16 +547,16 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
467
547
  end
468
548
  @@aggregate_maps.clear()
469
549
  end
470
-
550
+
471
551
  # reinit static variables for Logstash reload
472
552
  @@default_timeout = nil
473
553
  @@flush_instance_map = {}
474
554
  @@last_flush_timestamp_map = {}
475
555
  @@aggregate_maps_path_set = false
476
556
  end
477
-
557
+
478
558
  end
479
-
559
+
480
560
  # This method is invoked each time an event matches the filter
481
561
  public
482
562
  def filter(event)
@@ -490,10 +570,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
490
570
 
491
571
  # protect aggregate_maps against concurrent access, using a mutex
492
572
  @@mutex.synchronize do
493
-
573
+
494
574
  # retrieve the current aggregate map
495
575
  aggregate_maps_element = @@aggregate_maps[@task_id][task_id]
496
-
576
+
497
577
 
498
578
  # create aggregate map, if it doesn't exist
499
579
  if aggregate_maps_element.nil?
@@ -508,24 +588,25 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
508
588
  return if @map_action == "create"
509
589
  end
510
590
  map = aggregate_maps_element.map
511
-
591
+ # update last event timestamp
592
+ aggregate_maps_element.lastevent_timestamp = Time.now
512
593
  # execute the code to read/update map and event
513
594
  begin
514
595
  @codeblock.call(event, map)
515
596
  @logger.debug("Aggregate successful filter code execution", :code => @code)
516
597
  noError = true
517
598
  rescue => exception
518
- @logger.error("Aggregate exception occurred",
599
+ @logger.error("Aggregate exception occurred",
519
600
  :error => exception,
520
601
  :code => @code,
521
602
  :map => map,
522
603
  :event_data => event.to_hash_with_metadata)
523
604
  event.tag("_aggregateexception")
524
605
  end
525
-
606
+
526
607
  # delete the map if task is ended
527
608
  @@aggregate_maps[@task_id].delete(task_id) if @end_of_task
528
-
609
+
529
610
  end
530
611
 
531
612
  # match the filter, only if no error occurred
@@ -544,13 +625,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
544
625
  def create_timeout_event(aggregation_map, task_id)
545
626
 
546
627
  @logger.debug("Aggregate create_timeout_event call with task_id '#{task_id}'")
547
-
548
- event_to_yield = LogStash::Event.new(aggregation_map)
628
+
629
+ event_to_yield = LogStash::Event.new(aggregation_map)
549
630
 
550
631
  if @timeout_task_id_field
551
632
  event_to_yield.set(@timeout_task_id_field, task_id)
552
633
  end
553
-
634
+
554
635
  LogStash::Util::Decorators.add_tags(@timeout_tags, event_to_yield, "filters/#{self.class.name}")
555
636
 
556
637
  # Call code block if available
@@ -558,16 +639,16 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
558
639
  begin
559
640
  @timeout_codeblock.call(event_to_yield)
560
641
  rescue => exception
561
- @logger.error("Aggregate exception occurred",
642
+ @logger.error("Aggregate exception occurred",
562
643
  :error => exception,
563
644
  :timeout_code => @timeout_code,
564
645
  :timeout_event_data => event_to_yield.to_hash_with_metadata)
565
646
  event_to_yield.tag("_aggregateexception")
566
647
  end
567
648
  end
568
-
649
+
569
650
  return event_to_yield
570
- end
651
+ end
571
652
 
572
653
  # Extract the previous map in aggregate maps, and return it as a new Logstash event
573
654
  def extract_previous_map_as_event
@@ -581,12 +662,12 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
581
662
  def periodic_flush
582
663
  true
583
664
  end
584
-
665
+
585
666
  # This method is invoked by LogStash every 5 seconds.
586
667
  def flush(options = {})
587
-
668
+
588
669
  @logger.debug("Aggregate flush call with #{options}")
589
-
670
+
590
671
  # Protection against no timeout defined by Logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
591
672
  if @@default_timeout.nil?
592
673
  @@default_timeout = DEFAULT_TIMEOUT
@@ -597,9 +678,13 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
597
678
  elsif @@flush_instance_map[@task_id].timeout.nil?
598
679
  @@flush_instance_map[@task_id].timeout = @@default_timeout
599
680
  end
600
-
601
- # Launch timeout management only every interval of (@timeout / 2) seconds or at Logstash shutdown
602
- if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @timeout / 2 || options[:final])
681
+
682
+ if @@flush_instance_map[@task_id].inactivity_timeout.nil?
683
+ @@flush_instance_map[@task_id].inactivity_timeout = @@flush_instance_map[@task_id].timeout
684
+ end
685
+
686
+ # Launch timeout management only every interval of (@inactivity_timeout / 2) seconds or at Logstash shutdown
687
+ if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @inactivity_timeout / 2 || options[:final])
603
688
  events_to_flush = remove_expired_maps()
604
689
 
605
690
  # at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
@@ -611,10 +696,10 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
611
696
  if options[:final]
612
697
  events_to_flush.each { |event_to_flush| event_to_flush.tag("_aggregatefinalflush") }
613
698
  end
614
-
699
+
615
700
  # update last flush timestamp
616
701
  @@last_flush_timestamp_map[@task_id] = Time.now
617
-
702
+
618
703
  # return events to flush into Logstash pipeline
619
704
  return events_to_flush
620
705
  else
@@ -623,19 +708,20 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
623
708
 
624
709
  end
625
710
 
626
-
627
- # Remove the expired Aggregate maps from @@aggregate_maps if they are older than timeout.
711
+
712
+ # Remove the expired Aggregate maps from @@aggregate_maps if they are older than timeout or if no new event has been received since inactivity_timeout.
628
713
  # If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired maps are returned as new events to be flushed to Logstash pipeline.
629
714
  def remove_expired_maps()
630
715
  events_to_flush = []
631
716
  min_timestamp = Time.now - @timeout
632
-
717
+ min_inactivity_timestamp = Time.now - @inactivity_timeout
718
+
633
719
  @@mutex.synchronize do
634
720
 
635
721
  @logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@@aggregate_maps[@task_id].length} maps")
636
722
 
637
- @@aggregate_maps[@task_id].delete_if do |key, element|
638
- if element.creation_timestamp < min_timestamp
723
+ @@aggregate_maps[@task_id].delete_if do |key, element|
724
+ if element.creation_timestamp < min_timestamp || element.lastevent_timestamp < min_inactivity_timestamp
639
725
  if @push_previous_map_as_event || @push_map_as_event_on_timeout
640
726
  events_to_flush << create_timeout_event(element.map, key)
641
727
  end
@@ -644,14 +730,15 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
644
730
  next false
645
731
  end
646
732
  end
647
-
733
+
648
734
  return events_to_flush
649
735
  end
650
-
651
- # return if this filter instance has any timeout option enabled in Logstash configuration
736
+
737
+ # return if this filter instance has any timeout option enabled in logstash configuration
652
738
  def has_timeout_options?()
653
739
  return (
654
740
  timeout ||
741
+ inactivity_timeout ||
655
742
  timeout_code ||
656
743
  push_map_as_event_on_timeout ||
657
744
  push_previous_map_as_event ||
@@ -664,6 +751,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
664
751
  def display_timeout_options()
665
752
  return [
666
753
  "timeout",
754
+ "inactivity_timeout",
667
755
  "timeout_code",
668
756
  "push_map_as_event_on_timeout",
669
757
  "push_previous_map_as_event",
@@ -677,10 +765,11 @@ end # class LogStash::Filters::Aggregate
677
765
  # Element of "aggregate_maps"
678
766
  class LogStash::Filters::Aggregate::Element
679
767
 
680
- attr_accessor :creation_timestamp, :map
768
+ attr_accessor :creation_timestamp, :lastevent_timestamp, :map
681
769
 
682
770
  def initialize(creation_timestamp)
683
771
  @creation_timestamp = creation_timestamp
772
+ @lastevent_timestamp = creation_timestamp
684
773
  @map = {}
685
774
  end
686
- end
775
+ end