logstash-filter-aggregate 2.5.0 → 2.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8769a07ab85fdc52359b1e08838f8f308f96e61
4
- data.tar.gz: 0e2c0b0c078b789c87c57678683ac2ce72919fa5
3
+ metadata.gz: a9d474bc096fd4b164adb655607abdc0d52b7f15
4
+ data.tar.gz: 9e7b57528e3201bea76ed198e080be776b0ed3af
5
5
  SHA512:
6
- metadata.gz: 7baa1843fac215316d44e8d029aa840b9fe295f7bb953e2bb51873665da473d70f54551b23960491256a542687732d20e482a3efd98fbd8124012c4cea5c3e84
7
- data.tar.gz: b073f65ebad9629bdda9893f7e08fe630c8f48643bb4575001ed2430e46fbf6f13a218b09e2df9585788d519a5acf2fcb5b36c0e7e67ed8c45245c61dbdab523
6
+ metadata.gz: 6d08a02cdf7a32904e74f235f2f3888170cd535dce21aeb5767c6ddfa302920beb2a6c4216aac1b485ccb502a0f0180d687b5276e7e18ec691e4c8ae2c5895e2
7
+ data.tar.gz: 5d509ab8cf7d26fbce5cf4f53d8255ff7c203492c9fe96aab363afa73dedcda278e2497a7be91e7f8f7ce58f08bb42cfd93f08afd94628c4d54b6cd671dfeee9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 2.5.1
2
+ - enhancement: when final flush occurs (just before Logstash shutdown), add `_aggregatefinalflush` tag on generated timeout events
3
+ - bugfix: when final flush occurs (just before Logstash shutdown), push last aggregate map as event (if push_previous_map_as_event=true)
4
+ - bugfix: fix 'timeout_task_id_field' feature when push_previous_map_as_event=true
5
+ - bugfix: fix aggregate_maps_path feature (bug since v2.4.0)
6
+ - internal: add debug logging
7
+ - internal: refactor flush management static variables
8
+
1
9
  ## 2.5.0
2
10
  - new feature: add compatibility with Logstash 5
3
11
  - breaking: need Logstash 2.4 or later
data/README.md CHANGED
@@ -179,7 +179,7 @@ In that case, you don't want to wait task timeout to flush aggregation map.
179
179
  map['town_name'] ||= []
180
180
  event.to_hash.each do |key,value|
181
181
  map[key] = value unless map.has_key?(key)
182
- map[key] << value if map[key].is_a?(Array)
182
+ map[key] << value if map[key].is_a?(Array) and !value.is_a?(Array)
183
183
  end
184
184
  "
185
185
  push_previous_map_as_event => true
@@ -194,7 +194,7 @@ require "logstash/util/decorators"
194
194
  # map['town_name'] ||= []
195
195
  # event.to_hash.each do |key,value|
196
196
  # map[key] = value unless map.has_key?(key)
197
- # map[key] << value if map[key].is_a?(Array)
197
+ # map[key] << value if map[key].is_a?(Array) and !value.is_a?(Array)
198
198
  # end
199
199
  # "
200
200
  # push_previous_map_as_event => true
@@ -313,12 +313,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
313
313
 
314
314
  # Defines tags to add when a timeout event is generated and yield
315
315
  config :timeout_tags, :validate => :array, :required => false, :default => []
316
-
317
316
 
317
+
318
+ # STATIC VARIABLES
319
+
320
+
318
321
  # Default timeout (in seconds) when not defined in plugin configuration
319
322
  DEFAULT_TIMEOUT = 1800
320
323
 
321
-
322
324
  # This is the state of the filter.
323
325
  # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
324
326
  @@aggregate_maps = {}
@@ -329,20 +331,26 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
329
331
  # Default timeout for task_id patterns where timeout is not defined in logstash filter configuration
330
332
  @@default_timeout = nil
331
333
 
332
- # For each "task_id" pattern, defines which Aggregate instance will evict all expired Aggregate elements (older than timeout)
334
+ # For each "task_id" pattern, defines which Aggregate instance will process flush() call, processing expired Aggregate elements (older than timeout)
333
335
  # For each entry, key is "task_id pattern" and value is "aggregate instance"
334
- @@eviction_instance_map = {}
336
+ @@flush_instance_map = {}
335
337
 
336
- # last time where eviction was launched, per "task_id" pattern
337
- @@last_eviction_timestamp_map = {}
338
+ # last time where timeout management in flush() method was launched, per "task_id" pattern
339
+ @@last_flush_timestamp_map = {}
338
340
 
339
341
  # flag indicating if aggregate_maps_path option has been already set on one aggregate instance
340
342
  @@aggregate_maps_path_set = false
341
343
 
344
+ # defines which Aggregate instance will close Aggregate static variables
345
+ @@static_close_instance = nil
346
+
342
347
 
343
348
  # Initialize plugin
344
349
  public
345
350
  def register
351
+
352
+ @logger.debug("Aggregate register call", :code => @code)
353
+
346
354
  # process lambda expression to call in each filter call
347
355
  eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
348
356
 
@@ -355,11 +363,11 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
355
363
 
356
364
  # timeout management : define eviction_instance for current task_id pattern
357
365
  if has_timeout_options?
358
- if @@eviction_instance_map.has_key?(@task_id)
366
+ if @@flush_instance_map.has_key?(@task_id)
359
367
  # all timeout options have to be defined in only one aggregate filter per task_id pattern
360
368
  raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, there are more than one filter which defines timeout options. All timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : #{display_timeout_options}"
361
369
  end
362
- @@eviction_instance_map[@task_id] = self
370
+ @@flush_instance_map[@task_id] = self
363
371
  @logger.debug("Aggregate timeout for '#{@task_id}' pattern: #{@timeout} seconds")
364
372
  end
365
373
 
@@ -369,13 +377,19 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
369
377
  @logger.debug("Aggregate default timeout: #{@timeout} seconds")
370
378
  end
371
379
 
372
- # check if aggregate_maps_path option has already been set on another instance
380
+ # reinit static_close_instance (if necessary)
381
+ if !@@aggregate_maps_path_set && !@@static_close_instance.nil?
382
+ @@static_close_instance = nil
383
+ end
384
+
385
+ # check if aggregate_maps_path option has already been set on another instance else set @@aggregate_maps_path_set
373
386
  if !@aggregate_maps_path.nil?
374
387
  if @@aggregate_maps_path_set
375
388
  @@aggregate_maps_path_set = false
376
389
  raise LogStash::ConfigurationError, "Aggregate plugin: Option 'aggregate_maps_path' must be set on only one aggregate filter"
377
390
  else
378
391
  @@aggregate_maps_path_set = true
392
+ @@static_close_instance = self
379
393
  end
380
394
  end
381
395
 
@@ -394,21 +408,29 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
394
408
  # Called when logstash stops
395
409
  public
396
410
  def close
397
-
398
- # store aggregate maps to file (if option defined)
399
- @@mutex.synchronize do
400
- @@aggregate_maps.delete_if { |key, value| value.empty? }
401
- if !@aggregate_maps_path.nil? && !@@aggregate_maps.empty?
402
- File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
403
- @logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
411
+
412
+ @logger.debug("Aggregate close call", :code => @code)
413
+
414
+ # define static close instance if none is already defined
415
+ @@static_close_instance = self if @@static_close_instance.nil?
416
+
417
+ if @@static_close_instance == self
418
+ # store aggregate maps to file (if option defined)
419
+ @@mutex.synchronize do
420
+ @@aggregate_maps.delete_if { |key, value| value.empty? }
421
+ if !@aggregate_maps_path.nil? && !@@aggregate_maps.empty?
422
+ File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
423
+ @logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
424
+ end
425
+ @@aggregate_maps.clear()
404
426
  end
405
- @@aggregate_maps.clear()
427
+
428
+ # reinit static variables for logstash reload
429
+ @@default_timeout = nil
430
+ @@flush_instance_map = {}
431
+ @@last_flush_timestamp_map = {}
432
+ @@aggregate_maps_path_set = false
406
433
  end
407
-
408
- # Protection against logstash reload
409
- @@aggregate_maps_path_set = false if @@aggregate_maps_path_set
410
- @@default_timeout = nil unless @@default_timeout.nil?
411
- @@eviction_instance_map = {} unless @@eviction_instance_map.empty?
412
434
 
413
435
  end
414
436
 
@@ -435,8 +457,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
435
457
  return if @map_action == "update"
436
458
  # create new event from previous map, if @push_previous_map_as_event is enabled
437
459
  if @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
438
- previous_map = @@aggregate_maps[@task_id].shift[1].map
439
- event_to_yield = create_timeout_event(previous_map, task_id)
460
+ event_to_yield = extract_previous_map_as_event()
440
461
  end
441
462
  aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
442
463
  @@aggregate_maps[@task_id][task_id] = aggregate_maps_element
@@ -448,6 +469,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
448
469
  # execute the code to read/update map and event
449
470
  begin
450
471
  @codeblock.call(event, map)
472
+ @logger.debug("Aggregate successful filter code execution", :code => @code)
451
473
  noError = true
452
474
  rescue => exception
453
475
  @logger.error("Aggregate exception occurred",
@@ -477,6 +499,9 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
477
499
  # if @timeout_code is set, it will execute the timeout code on the created timeout event
478
500
  # returns the newly created event
479
501
  def create_timeout_event(aggregation_map, task_id)
502
+
503
+ @logger.debug("Aggregate create_timeout_event call with task_id '#{task_id}'")
504
+
480
505
  event_to_yield = LogStash::Event.new(aggregation_map)
481
506
 
482
507
  if @timeout_task_id_field
@@ -501,6 +526,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
501
526
  return event_to_yield
502
527
  end
503
528
 
529
+ # Extract the previous map in aggregate maps, and return it as a new Logstash event
530
+ def extract_previous_map_as_event
531
+ previous_entry = @@aggregate_maps[@task_id].shift()
532
+ previous_task_id = previous_entry[0]
533
+ previous_map = previous_entry[1].map
534
+ return create_timeout_event(previous_map, previous_task_id)
535
+ end
536
+
504
537
  # Necessary to indicate logstash to periodically call 'flush' method
505
538
  def periodic_flush
506
539
  true
@@ -508,22 +541,41 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
508
541
 
509
542
  # This method is invoked by LogStash every 5 seconds.
510
543
  def flush(options = {})
544
+
545
+ @logger.debug("Aggregate flush call with #{options}")
546
+
511
547
  # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
512
548
  if @@default_timeout.nil?
513
549
  @@default_timeout = DEFAULT_TIMEOUT
514
550
  end
515
- if !@@eviction_instance_map.has_key?(@task_id)
516
- @@eviction_instance_map[@task_id] = self
551
+ if !@@flush_instance_map.has_key?(@task_id)
552
+ @@flush_instance_map[@task_id] = self
517
553
  @timeout = @@default_timeout
518
- elsif @@eviction_instance_map[@task_id].timeout.nil?
519
- @@eviction_instance_map[@task_id].timeout = @@default_timeout
554
+ elsif @@flush_instance_map[@task_id].timeout.nil?
555
+ @@flush_instance_map[@task_id].timeout = @@default_timeout
520
556
  end
521
557
 
522
- # Launch eviction only every interval of (@timeout / 2) seconds
523
- if @@eviction_instance_map[@task_id] == self && (!@@last_eviction_timestamp_map.has_key?(@task_id) || Time.now > @@last_eviction_timestamp_map[@task_id] + @timeout / 2)
558
+ # Launch timeout management only every interval of (@timeout / 2) seconds or at Logstash shutdown
559
+ if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @timeout / 2 || options[:final])
524
560
  events_to_flush = remove_expired_maps()
525
- @@last_eviction_timestamp_map[@task_id] = Time.now
561
+
562
+ # at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
563
+ if options[:final] && @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
564
+ events_to_flush << extract_previous_map_as_event()
565
+ end
566
+
567
+ # tag flushed events, indicating "final flush" special event
568
+ if options[:final]
569
+ events_to_flush.each { |event_to_flush| event_to_flush.tag("_aggregatefinalflush") }
570
+ end
571
+
572
+ # update last flush timestamp
573
+ @@last_flush_timestamp_map[@task_id] = Time.now
574
+
575
+ # return events to flush into Logstash pipeline
526
576
  return events_to_flush
577
+ else
578
+ return []
527
579
  end
528
580
 
529
581
  end
@@ -537,6 +589,8 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
537
589
 
538
590
  @@mutex.synchronize do
539
591
 
592
+ @logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@@aggregate_maps[@task_id].length} maps")
593
+
540
594
  @@aggregate_maps[@task_id].delete_if do |key, element|
541
595
  if element.creation_timestamp < min_timestamp
542
596
  if @push_previous_map_as_event || @push_map_as_event_on_timeout
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-filter-aggregate'
3
- s.version = '2.5.0'
3
+ s.version = '2.5.1'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = 'The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task, and finally push aggregated information into final task event.'
6
6
  s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
@@ -6,8 +6,7 @@ require_relative "aggregate_spec_helper"
6
6
  describe LogStash::Filters::Aggregate do
7
7
 
8
8
  before(:each) do
9
- reset_timeout_management()
10
- aggregate_maps.clear()
9
+ reset_static_variables()
11
10
  @start_filter = setup_filter({ "map_action" => "create", "code" => "map['sql_duration'] = 0" })
12
11
  @update_filter = setup_filter({ "map_action" => "update", "code" => "map['sql_duration'] += event.get('duration')" })
13
12
  @end_filter = setup_filter({"timeout_task_id_field" => "my_id", "push_map_as_event_on_timeout" => true, "map_action" => "update", "code" => "event.set('sql_duration', map['sql_duration'])", "end_of_task" => true, "timeout" => 5, "timeout_code" => "event.set('test', 'testValue')", "timeout_tags" => ["tag1", "tag2"] })
@@ -212,6 +211,9 @@ describe LogStash::Filters::Aggregate do
212
211
  filter = store_filter.filter(start_event)
213
212
  expect(aggregate_maps["%{taskid}"].size).to eq(1)
214
213
 
214
+ @end_filter.close()
215
+ expect(aggregate_maps).not_to be_empty
216
+
215
217
  store_filter.close()
216
218
  expect(File.exist?(store_file)).to be true
217
219
  expect(aggregate_maps).to be_empty
@@ -232,6 +234,21 @@ describe LogStash::Filters::Aggregate do
232
234
  end
233
235
  end
234
236
 
237
+ context "Logstash reload occurs, " do
238
+ describe "close method is called, " do
239
+ it "reinitializes static variables" do
240
+ @end_filter.close()
241
+ expect(aggregate_maps).to be_empty
242
+ expect(taskid_eviction_instance).to be_nil
243
+ expect(static_close_instance).not_to be_nil
244
+ expect(aggregate_maps_path_set).to be false
245
+
246
+ @end_filter.register()
247
+ expect(static_close_instance).to be_nil
248
+ end
249
+ end
250
+ end
251
+
235
252
  context "push_previous_map_as_event option is defined, " do
236
253
  describe "when push_previous_map_as_event option is activated on another filter with same task_id pattern" do
237
254
  it "should throw a LogStash::ConfigurationError" do
@@ -243,9 +260,9 @@ describe LogStash::Filters::Aggregate do
243
260
 
244
261
  describe "when a new task id is detected, " do
245
262
  it "should push previous map as new event" do
246
- push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5 })
263
+ push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5, "timeout_task_id_field" => "timeout_task_id_field" })
247
264
  push_filter.filter(event({"ppm_id" => "1"})) { |yield_event| fail "task 1 shouldn't have yield event" }
248
- push_filter.filter(event({"ppm_id" => "2"})) { |yield_event| expect(yield_event.get("ppm_id")).to eq("1") }
265
+ push_filter.filter(event({"ppm_id" => "2"})) { |yield_event| expect(yield_event.get("ppm_id")).to eq("1") ; expect(yield_event.get("timeout_task_id_field")).to eq("1") }
249
266
  expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
250
267
  end
251
268
  end
@@ -263,6 +280,21 @@ describe LogStash::Filters::Aggregate do
263
280
  expect(aggregate_maps["%{ppm_id}"].size).to eq(0)
264
281
  end
265
282
  end
283
+
284
+ describe "when Logstash shutdown happens, " do
285
+ it "flush method should return last map as new event even if timeout has not occured" do
286
+ push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "", "push_previous_map_as_event" => true, "timeout" => 4 })
287
+ push_filter.filter(event({"ppm_id" => "1"}))
288
+ events_to_flush = push_filter.flush({:final=>false})
289
+ expect(events_to_flush).to be_empty
290
+ expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
291
+ events_to_flush = push_filter.flush({:final=>true})
292
+ expect(events_to_flush).not_to be_nil
293
+ expect(events_to_flush.size).to eq(1)
294
+ expect(events_to_flush[0].get("tags")).to eq(["_aggregatefinalflush"])
295
+ expect(aggregate_maps["%{ppm_id}"].size).to eq(0)
296
+ end
297
+ end
266
298
  end
267
299
 
268
300
 
@@ -38,10 +38,26 @@ def aggregate_maps()
38
38
  end
39
39
 
40
40
  def taskid_eviction_instance()
41
- LogStash::Filters::Aggregate.class_variable_get(:@@eviction_instance_map)["%{taskid}"]
41
+ LogStash::Filters::Aggregate.class_variable_get(:@@flush_instance_map)["%{taskid}"]
42
+ end
43
+
44
+ def static_close_instance()
45
+ LogStash::Filters::Aggregate.class_variable_get(:@@static_close_instance)
46
+ end
47
+
48
+ def aggregate_maps_path_set()
49
+ LogStash::Filters::Aggregate.class_variable_get(:@@aggregate_maps_path_set)
42
50
  end
43
51
 
44
52
  def reset_timeout_management()
45
53
  LogStash::Filters::Aggregate.class_variable_set(:@@default_timeout, nil)
46
- LogStash::Filters::Aggregate.class_variable_get(:@@eviction_instance_map).clear()
54
+ LogStash::Filters::Aggregate.class_variable_get(:@@flush_instance_map).clear()
55
+ LogStash::Filters::Aggregate.class_variable_get(:@@last_flush_timestamp_map).clear()
56
+ end
57
+
58
+ def reset_static_variables()
59
+ reset_timeout_management()
60
+ aggregate_maps().clear()
61
+ LogStash::Filters::Aggregate.class_variable_set(:@@static_close_instance, nil)
62
+ LogStash::Filters::Aggregate.class_variable_set(:@@aggregate_maps_path_set, false)
47
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-30 00:00:00.000000000 Z
12
+ date: 2017-01-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement