logstash-filter-aggregate 2.5.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b8769a07ab85fdc52359b1e08838f8f308f96e61
4
- data.tar.gz: 0e2c0b0c078b789c87c57678683ac2ce72919fa5
3
+ metadata.gz: a9d474bc096fd4b164adb655607abdc0d52b7f15
4
+ data.tar.gz: 9e7b57528e3201bea76ed198e080be776b0ed3af
5
5
  SHA512:
6
- metadata.gz: 7baa1843fac215316d44e8d029aa840b9fe295f7bb953e2bb51873665da473d70f54551b23960491256a542687732d20e482a3efd98fbd8124012c4cea5c3e84
7
- data.tar.gz: b073f65ebad9629bdda9893f7e08fe630c8f48643bb4575001ed2430e46fbf6f13a218b09e2df9585788d519a5acf2fcb5b36c0e7e67ed8c45245c61dbdab523
6
+ metadata.gz: 6d08a02cdf7a32904e74f235f2f3888170cd535dce21aeb5767c6ddfa302920beb2a6c4216aac1b485ccb502a0f0180d687b5276e7e18ec691e4c8ae2c5895e2
7
+ data.tar.gz: 5d509ab8cf7d26fbce5cf4f53d8255ff7c203492c9fe96aab363afa73dedcda278e2497a7be91e7f8f7ce58f08bb42cfd93f08afd94628c4d54b6cd671dfeee9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 2.5.1
2
+ - enhancement: when final flush occurs (just before Logstash shutdown), add `_aggregatefinalflush` tag on generated timeout events
3
+ - bugfix: when final flush occurs (just before Logstash shutdown), push last aggregate map as event (if push_previous_map_as_event=true)
4
+ - bugfix: fix 'timeout_task_id_field' feature when push_previous_map_as_event=true
5
+ - bugfix: fix aggregate_maps_path feature (bug since v2.4.0)
6
+ - internal: add debug logging
7
+ - internal: refactor flush management static variables
8
+
1
9
  ## 2.5.0
2
10
  - new feature: add compatibility with Logstash 5
3
11
  - breaking: need Logstash 2.4 or later
data/README.md CHANGED
@@ -179,7 +179,7 @@ In that case, you don't want to wait task timeout to flush aggregation map.
179
179
  map['town_name'] ||= []
180
180
  event.to_hash.each do |key,value|
181
181
  map[key] = value unless map.has_key?(key)
182
- map[key] << value if map[key].is_a?(Array)
182
+ map[key] << value if map[key].is_a?(Array) and !value.is_a?(Array)
183
183
  end
184
184
  "
185
185
  push_previous_map_as_event => true
@@ -194,7 +194,7 @@ require "logstash/util/decorators"
194
194
  # map['town_name'] ||= []
195
195
  # event.to_hash.each do |key,value|
196
196
  # map[key] = value unless map.has_key?(key)
197
- # map[key] << value if map[key].is_a?(Array)
197
+ # map[key] << value if map[key].is_a?(Array) and !value.is_a?(Array)
198
198
  # end
199
199
  # "
200
200
  # push_previous_map_as_event => true
@@ -313,12 +313,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
313
313
 
314
314
  # Defines tags to add when a timeout event is generated and yield
315
315
  config :timeout_tags, :validate => :array, :required => false, :default => []
316
-
317
316
 
317
+
318
+ # STATIC VARIABLES
319
+
320
+
318
321
  # Default timeout (in seconds) when not defined in plugin configuration
319
322
  DEFAULT_TIMEOUT = 1800
320
323
 
321
-
322
324
  # This is the state of the filter.
323
325
  # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
324
326
  @@aggregate_maps = {}
@@ -329,20 +331,26 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
329
331
  # Default timeout for task_id patterns where timeout is not defined in logstash filter configuration
330
332
  @@default_timeout = nil
331
333
 
332
- # For each "task_id" pattern, defines which Aggregate instance will evict all expired Aggregate elements (older than timeout)
334
+ # For each "task_id" pattern, defines which Aggregate instance will process flush() call, processing expired Aggregate elements (older than timeout)
333
335
  # For each entry, key is "task_id pattern" and value is "aggregate instance"
334
- @@eviction_instance_map = {}
336
+ @@flush_instance_map = {}
335
337
 
336
- # last time where eviction was launched, per "task_id" pattern
337
- @@last_eviction_timestamp_map = {}
338
+ # last time where timeout management in flush() method was launched, per "task_id" pattern
339
+ @@last_flush_timestamp_map = {}
338
340
 
339
341
  # flag indicating if aggregate_maps_path option has been already set on one aggregate instance
340
342
  @@aggregate_maps_path_set = false
341
343
 
344
+ # defines which Aggregate instance will close Aggregate static variables
345
+ @@static_close_instance = nil
346
+
342
347
 
343
348
  # Initialize plugin
344
349
  public
345
350
  def register
351
+
352
+ @logger.debug("Aggregate register call", :code => @code)
353
+
346
354
  # process lambda expression to call in each filter call
347
355
  eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
348
356
 
@@ -355,11 +363,11 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
355
363
 
356
364
  # timeout management : define eviction_instance for current task_id pattern
357
365
  if has_timeout_options?
358
- if @@eviction_instance_map.has_key?(@task_id)
366
+ if @@flush_instance_map.has_key?(@task_id)
359
367
  # all timeout options have to be defined in only one aggregate filter per task_id pattern
360
368
  raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern #{@task_id}, there are more than one filter which defines timeout options. All timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : #{display_timeout_options}"
361
369
  end
362
- @@eviction_instance_map[@task_id] = self
370
+ @@flush_instance_map[@task_id] = self
363
371
  @logger.debug("Aggregate timeout for '#{@task_id}' pattern: #{@timeout} seconds")
364
372
  end
365
373
 
@@ -369,13 +377,19 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
369
377
  @logger.debug("Aggregate default timeout: #{@timeout} seconds")
370
378
  end
371
379
 
372
- # check if aggregate_maps_path option has already been set on another instance
380
+ # reinit static_close_instance (if necessary)
381
+ if !@@aggregate_maps_path_set && !@@static_close_instance.nil?
382
+ @@static_close_instance = nil
383
+ end
384
+
385
+ # check if aggregate_maps_path option has already been set on another instance else set @@aggregate_maps_path_set
373
386
  if !@aggregate_maps_path.nil?
374
387
  if @@aggregate_maps_path_set
375
388
  @@aggregate_maps_path_set = false
376
389
  raise LogStash::ConfigurationError, "Aggregate plugin: Option 'aggregate_maps_path' must be set on only one aggregate filter"
377
390
  else
378
391
  @@aggregate_maps_path_set = true
392
+ @@static_close_instance = self
379
393
  end
380
394
  end
381
395
 
@@ -394,21 +408,29 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
394
408
  # Called when logstash stops
395
409
  public
396
410
  def close
397
-
398
- # store aggregate maps to file (if option defined)
399
- @@mutex.synchronize do
400
- @@aggregate_maps.delete_if { |key, value| value.empty? }
401
- if !@aggregate_maps_path.nil? && !@@aggregate_maps.empty?
402
- File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
403
- @logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
411
+
412
+ @logger.debug("Aggregate close call", :code => @code)
413
+
414
+ # define static close instance if none is already defined
415
+ @@static_close_instance = self if @@static_close_instance.nil?
416
+
417
+ if @@static_close_instance == self
418
+ # store aggregate maps to file (if option defined)
419
+ @@mutex.synchronize do
420
+ @@aggregate_maps.delete_if { |key, value| value.empty? }
421
+ if !@aggregate_maps_path.nil? && !@@aggregate_maps.empty?
422
+ File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
423
+ @logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
424
+ end
425
+ @@aggregate_maps.clear()
404
426
  end
405
- @@aggregate_maps.clear()
427
+
428
+ # reinit static variables for logstash reload
429
+ @@default_timeout = nil
430
+ @@flush_instance_map = {}
431
+ @@last_flush_timestamp_map = {}
432
+ @@aggregate_maps_path_set = false
406
433
  end
407
-
408
- # Protection against logstash reload
409
- @@aggregate_maps_path_set = false if @@aggregate_maps_path_set
410
- @@default_timeout = nil unless @@default_timeout.nil?
411
- @@eviction_instance_map = {} unless @@eviction_instance_map.empty?
412
434
 
413
435
  end
414
436
 
@@ -435,8 +457,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
435
457
  return if @map_action == "update"
436
458
  # create new event from previous map, if @push_previous_map_as_event is enabled
437
459
  if @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
438
- previous_map = @@aggregate_maps[@task_id].shift[1].map
439
- event_to_yield = create_timeout_event(previous_map, task_id)
460
+ event_to_yield = extract_previous_map_as_event()
440
461
  end
441
462
  aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
442
463
  @@aggregate_maps[@task_id][task_id] = aggregate_maps_element
@@ -448,6 +469,7 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
448
469
  # execute the code to read/update map and event
449
470
  begin
450
471
  @codeblock.call(event, map)
472
+ @logger.debug("Aggregate successful filter code execution", :code => @code)
451
473
  noError = true
452
474
  rescue => exception
453
475
  @logger.error("Aggregate exception occurred",
@@ -477,6 +499,9 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
477
499
  # if @timeout_code is set, it will execute the timeout code on the created timeout event
478
500
  # returns the newly created event
479
501
  def create_timeout_event(aggregation_map, task_id)
502
+
503
+ @logger.debug("Aggregate create_timeout_event call with task_id '#{task_id}'")
504
+
480
505
  event_to_yield = LogStash::Event.new(aggregation_map)
481
506
 
482
507
  if @timeout_task_id_field
@@ -501,6 +526,14 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
501
526
  return event_to_yield
502
527
  end
503
528
 
529
+ # Extract the previous map in aggregate maps, and return it as a new Logstash event
530
+ def extract_previous_map_as_event
531
+ previous_entry = @@aggregate_maps[@task_id].shift()
532
+ previous_task_id = previous_entry[0]
533
+ previous_map = previous_entry[1].map
534
+ return create_timeout_event(previous_map, previous_task_id)
535
+ end
536
+
504
537
  # Necessary to indicate logstash to periodically call 'flush' method
505
538
  def periodic_flush
506
539
  true
@@ -508,22 +541,41 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
508
541
 
509
542
  # This method is invoked by LogStash every 5 seconds.
510
543
  def flush(options = {})
544
+
545
+ @logger.debug("Aggregate flush call with #{options}")
546
+
511
547
  # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
512
548
  if @@default_timeout.nil?
513
549
  @@default_timeout = DEFAULT_TIMEOUT
514
550
  end
515
- if !@@eviction_instance_map.has_key?(@task_id)
516
- @@eviction_instance_map[@task_id] = self
551
+ if !@@flush_instance_map.has_key?(@task_id)
552
+ @@flush_instance_map[@task_id] = self
517
553
  @timeout = @@default_timeout
518
- elsif @@eviction_instance_map[@task_id].timeout.nil?
519
- @@eviction_instance_map[@task_id].timeout = @@default_timeout
554
+ elsif @@flush_instance_map[@task_id].timeout.nil?
555
+ @@flush_instance_map[@task_id].timeout = @@default_timeout
520
556
  end
521
557
 
522
- # Launch eviction only every interval of (@timeout / 2) seconds
523
- if @@eviction_instance_map[@task_id] == self && (!@@last_eviction_timestamp_map.has_key?(@task_id) || Time.now > @@last_eviction_timestamp_map[@task_id] + @timeout / 2)
558
+ # Launch timeout management only every interval of (@timeout / 2) seconds or at Logstash shutdown
559
+ if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @timeout / 2 || options[:final])
524
560
  events_to_flush = remove_expired_maps()
525
- @@last_eviction_timestamp_map[@task_id] = Time.now
561
+
562
+ # at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
563
+ if options[:final] && @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
564
+ events_to_flush << extract_previous_map_as_event()
565
+ end
566
+
567
+ # tag flushed events, indicating "final flush" special event
568
+ if options[:final]
569
+ events_to_flush.each { |event_to_flush| event_to_flush.tag("_aggregatefinalflush") }
570
+ end
571
+
572
+ # update last flush timestamp
573
+ @@last_flush_timestamp_map[@task_id] = Time.now
574
+
575
+ # return events to flush into Logstash pipeline
526
576
  return events_to_flush
577
+ else
578
+ return []
527
579
  end
528
580
 
529
581
  end
@@ -537,6 +589,8 @@ class LogStash::Filters::Aggregate < LogStash::Filters::Base
537
589
 
538
590
  @@mutex.synchronize do
539
591
 
592
+ @logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@@aggregate_maps[@task_id].length} maps")
593
+
540
594
  @@aggregate_maps[@task_id].delete_if do |key, element|
541
595
  if element.creation_timestamp < min_timestamp
542
596
  if @push_previous_map_as_event || @push_map_as_event_on_timeout
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-filter-aggregate'
3
- s.version = '2.5.0'
3
+ s.version = '2.5.1'
4
4
  s.licenses = ['Apache License (2.0)']
5
5
  s.summary = 'The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task, and finally push aggregated information into final task event.'
6
6
  s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
@@ -6,8 +6,7 @@ require_relative "aggregate_spec_helper"
6
6
  describe LogStash::Filters::Aggregate do
7
7
 
8
8
  before(:each) do
9
- reset_timeout_management()
10
- aggregate_maps.clear()
9
+ reset_static_variables()
11
10
  @start_filter = setup_filter({ "map_action" => "create", "code" => "map['sql_duration'] = 0" })
12
11
  @update_filter = setup_filter({ "map_action" => "update", "code" => "map['sql_duration'] += event.get('duration')" })
13
12
  @end_filter = setup_filter({"timeout_task_id_field" => "my_id", "push_map_as_event_on_timeout" => true, "map_action" => "update", "code" => "event.set('sql_duration', map['sql_duration'])", "end_of_task" => true, "timeout" => 5, "timeout_code" => "event.set('test', 'testValue')", "timeout_tags" => ["tag1", "tag2"] })
@@ -212,6 +211,9 @@ describe LogStash::Filters::Aggregate do
212
211
  filter = store_filter.filter(start_event)
213
212
  expect(aggregate_maps["%{taskid}"].size).to eq(1)
214
213
 
214
+ @end_filter.close()
215
+ expect(aggregate_maps).not_to be_empty
216
+
215
217
  store_filter.close()
216
218
  expect(File.exist?(store_file)).to be true
217
219
  expect(aggregate_maps).to be_empty
@@ -232,6 +234,21 @@ describe LogStash::Filters::Aggregate do
232
234
  end
233
235
  end
234
236
 
237
+ context "Logstash reload occurs, " do
238
+ describe "close method is called, " do
239
+ it "reinitializes static variables" do
240
+ @end_filter.close()
241
+ expect(aggregate_maps).to be_empty
242
+ expect(taskid_eviction_instance).to be_nil
243
+ expect(static_close_instance).not_to be_nil
244
+ expect(aggregate_maps_path_set).to be false
245
+
246
+ @end_filter.register()
247
+ expect(static_close_instance).to be_nil
248
+ end
249
+ end
250
+ end
251
+
235
252
  context "push_previous_map_as_event option is defined, " do
236
253
  describe "when push_previous_map_as_event option is activated on another filter with same task_id pattern" do
237
254
  it "should throw a LogStash::ConfigurationError" do
@@ -243,9 +260,9 @@ describe LogStash::Filters::Aggregate do
243
260
 
244
261
  describe "when a new task id is detected, " do
245
262
  it "should push previous map as new event" do
246
- push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5 })
263
+ push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "map['ppm_id'] = event.get('ppm_id')", "push_previous_map_as_event" => true, "timeout" => 5, "timeout_task_id_field" => "timeout_task_id_field" })
247
264
  push_filter.filter(event({"ppm_id" => "1"})) { |yield_event| fail "task 1 shouldn't have yield event" }
248
- push_filter.filter(event({"ppm_id" => "2"})) { |yield_event| expect(yield_event.get("ppm_id")).to eq("1") }
265
+ push_filter.filter(event({"ppm_id" => "2"})) { |yield_event| expect(yield_event.get("ppm_id")).to eq("1") ; expect(yield_event.get("timeout_task_id_field")).to eq("1") }
249
266
  expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
250
267
  end
251
268
  end
@@ -263,6 +280,21 @@ describe LogStash::Filters::Aggregate do
263
280
  expect(aggregate_maps["%{ppm_id}"].size).to eq(0)
264
281
  end
265
282
  end
283
+
284
+ describe "when Logstash shutdown happens, " do
285
+ it "flush method should return last map as new event even if timeout has not occured" do
286
+ push_filter = setup_filter({ "task_id" => "%{ppm_id}", "code" => "", "push_previous_map_as_event" => true, "timeout" => 4 })
287
+ push_filter.filter(event({"ppm_id" => "1"}))
288
+ events_to_flush = push_filter.flush({:final=>false})
289
+ expect(events_to_flush).to be_empty
290
+ expect(aggregate_maps["%{ppm_id}"].size).to eq(1)
291
+ events_to_flush = push_filter.flush({:final=>true})
292
+ expect(events_to_flush).not_to be_nil
293
+ expect(events_to_flush.size).to eq(1)
294
+ expect(events_to_flush[0].get("tags")).to eq(["_aggregatefinalflush"])
295
+ expect(aggregate_maps["%{ppm_id}"].size).to eq(0)
296
+ end
297
+ end
266
298
  end
267
299
 
268
300
 
@@ -38,10 +38,26 @@ def aggregate_maps()
38
38
  end
39
39
 
40
40
  def taskid_eviction_instance()
41
- LogStash::Filters::Aggregate.class_variable_get(:@@eviction_instance_map)["%{taskid}"]
41
+ LogStash::Filters::Aggregate.class_variable_get(:@@flush_instance_map)["%{taskid}"]
42
+ end
43
+
44
+ def static_close_instance()
45
+ LogStash::Filters::Aggregate.class_variable_get(:@@static_close_instance)
46
+ end
47
+
48
+ def aggregate_maps_path_set()
49
+ LogStash::Filters::Aggregate.class_variable_get(:@@aggregate_maps_path_set)
42
50
  end
43
51
 
44
52
  def reset_timeout_management()
45
53
  LogStash::Filters::Aggregate.class_variable_set(:@@default_timeout, nil)
46
- LogStash::Filters::Aggregate.class_variable_get(:@@eviction_instance_map).clear()
54
+ LogStash::Filters::Aggregate.class_variable_get(:@@flush_instance_map).clear()
55
+ LogStash::Filters::Aggregate.class_variable_get(:@@last_flush_timestamp_map).clear()
56
+ end
57
+
58
+ def reset_static_variables()
59
+ reset_timeout_management()
60
+ aggregate_maps().clear()
61
+ LogStash::Filters::Aggregate.class_variable_set(:@@static_close_instance, nil)
62
+ LogStash::Filters::Aggregate.class_variable_set(:@@aggregate_maps_path_set, false)
47
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-30 00:00:00.000000000 Z
12
+ date: 2017-01-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement