logstash-filter-aggregate 2.5.1 → 2.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BUILD.md +81 -81
- data/CHANGELOG.md +71 -65
- data/CONTRIBUTORS +14 -14
- data/Gemfile +2 -2
- data/LICENSE +13 -13
- data/NOTICE.txt +4 -4
- data/README.md +327 -296
- data/lib/logstash/filters/aggregate.rb +685 -642
- data/logstash-filter-aggregate.gemspec +26 -26
- data/spec/filters/aggregate_spec.rb +315 -301
- data/spec/filters/aggregate_spec_helper.rb +63 -63
- metadata +2 -2
@@ -1,643 +1,686 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require "logstash/filters/base"
|
4
|
-
require "logstash/namespace"
|
5
|
-
require "thread"
|
6
|
-
require "logstash/util/decorators"
|
7
|
-
|
8
|
-
#
|
9
|
-
# The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
|
10
|
-
# and finally push aggregated information into final task event.
|
11
|
-
#
|
12
|
-
# You should be very careful to set
|
13
|
-
# otherwise events may be processed out of sequence and unexpected results will occur.
|
14
|
-
#
|
15
|
-
# ==== Example #1
|
16
|
-
#
|
17
|
-
# * with these given logs :
|
18
|
-
# [source,ruby]
|
19
|
-
# ----------------------------------
|
20
|
-
# INFO - 12345 - TASK_START - start
|
21
|
-
# INFO - 12345 - SQL - sqlQuery1 - 12
|
22
|
-
# INFO - 12345 - SQL - sqlQuery2 - 34
|
23
|
-
# INFO - 12345 - TASK_END - end
|
24
|
-
# ----------------------------------
|
25
|
-
#
|
26
|
-
# * you can aggregate "sql duration" for the whole task with this configuration :
|
27
|
-
# [source,ruby]
|
28
|
-
# ----------------------------------
|
29
|
-
# filter {
|
30
|
-
# grok {
|
31
|
-
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
32
|
-
# }
|
33
|
-
#
|
34
|
-
# if [logger] == "TASK_START" {
|
35
|
-
# aggregate {
|
36
|
-
# task_id => "%{taskid}"
|
37
|
-
# code => "map['sql_duration'] = 0"
|
38
|
-
# map_action => "create"
|
39
|
-
# }
|
40
|
-
# }
|
41
|
-
#
|
42
|
-
# if [logger] == "SQL" {
|
43
|
-
# aggregate {
|
44
|
-
# task_id => "%{taskid}"
|
45
|
-
# code => "map['sql_duration'] += event.get('duration')"
|
46
|
-
# map_action => "update"
|
47
|
-
# }
|
48
|
-
# }
|
49
|
-
#
|
50
|
-
# if [logger] == "TASK_END" {
|
51
|
-
# aggregate {
|
52
|
-
# task_id => "%{taskid}"
|
53
|
-
# code => "event.set('sql_duration', map['sql_duration'])"
|
54
|
-
# map_action => "update"
|
55
|
-
# end_of_task => true
|
56
|
-
# timeout => 120
|
57
|
-
# }
|
58
|
-
# }
|
59
|
-
# }
|
60
|
-
# ----------------------------------
|
61
|
-
#
|
62
|
-
# * the final event then looks like :
|
63
|
-
# [source,ruby]
|
64
|
-
# ----------------------------------
|
65
|
-
# {
|
66
|
-
# "message" => "INFO - 12345 - TASK_END - end message",
|
67
|
-
# "sql_duration" => 46
|
68
|
-
# }
|
69
|
-
# ----------------------------------
|
70
|
-
#
|
71
|
-
# the field `sql_duration` is added and contains the sum of all sql queries durations.
|
72
|
-
#
|
73
|
-
# ==== Example #2 : no start event
|
74
|
-
#
|
75
|
-
# * If you have the same logs than example #1, but without a start log :
|
76
|
-
# [source,ruby]
|
77
|
-
# ----------------------------------
|
78
|
-
# INFO - 12345 - SQL - sqlQuery1 - 12
|
79
|
-
# INFO - 12345 - SQL - sqlQuery2 - 34
|
80
|
-
# INFO - 12345 - TASK_END - end
|
81
|
-
# ----------------------------------
|
82
|
-
#
|
83
|
-
# * you can also aggregate "sql duration" with a slightly different configuration :
|
84
|
-
# [source,ruby]
|
85
|
-
# ----------------------------------
|
86
|
-
# filter {
|
87
|
-
# grok {
|
88
|
-
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
89
|
-
# }
|
90
|
-
#
|
91
|
-
# if [logger] == "SQL" {
|
92
|
-
# aggregate {
|
93
|
-
# task_id => "%{taskid}"
|
94
|
-
# code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event.get('duration')"
|
95
|
-
# }
|
96
|
-
# }
|
97
|
-
#
|
98
|
-
# if [logger] == "TASK_END" {
|
99
|
-
# aggregate {
|
100
|
-
# task_id => "%{taskid}"
|
101
|
-
# code => "event.set('sql_duration', map['sql_duration'])"
|
102
|
-
# end_of_task => true
|
103
|
-
# timeout => 120
|
104
|
-
# }
|
105
|
-
# }
|
106
|
-
# }
|
107
|
-
# ----------------------------------
|
108
|
-
#
|
109
|
-
# * the final event is exactly the same than example #1
|
110
|
-
# * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
|
111
|
-
#
|
112
|
-
#
|
113
|
-
# ==== Example #3 : no end event
|
114
|
-
#
|
115
|
-
# Third use case: You have no specific end event.
|
116
|
-
#
|
117
|
-
# A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction.
|
118
|
-
#
|
119
|
-
# In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
|
120
|
-
# In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
|
121
|
-
# We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
|
122
|
-
#
|
123
|
-
# * Given these logs:
|
124
|
-
#
|
125
|
-
# [source,ruby]
|
126
|
-
# ----------------------------------
|
127
|
-
# INFO - 12345 - Clicked One
|
128
|
-
# INFO - 12345 - Clicked Two
|
129
|
-
# INFO - 12345 - Clicked Three
|
130
|
-
# ----------------------------------
|
131
|
-
#
|
132
|
-
# * You can aggregate the amount of clicks the user did like this:
|
133
|
-
#
|
134
|
-
# [source,ruby]
|
135
|
-
# ----------------------------------
|
136
|
-
# filter {
|
137
|
-
# grok {
|
138
|
-
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:user_id} - %{GREEDYDATA:msg_text}" ]
|
139
|
-
# }
|
140
|
-
#
|
141
|
-
# aggregate {
|
142
|
-
# task_id => "%{user_id}"
|
143
|
-
# code => "map['clicks'] ||= 0; map['clicks'] += 1;"
|
144
|
-
# push_map_as_event_on_timeout => true
|
145
|
-
# timeout_task_id_field => "user_id"
|
146
|
-
# timeout => 600 # 10 minutes timeout
|
147
|
-
# timeout_tags => ['_aggregatetimeout']
|
148
|
-
# timeout_code => "event.set('several_clicks', event.get('clicks') > 1)"
|
149
|
-
# }
|
150
|
-
# }
|
151
|
-
# ----------------------------------
|
152
|
-
#
|
153
|
-
# * After ten minutes, this will yield an event like:
|
154
|
-
#
|
155
|
-
# [source,json]
|
156
|
-
# ----------------------------------
|
157
|
-
# {
|
158
|
-
# "user_id": "12345",
|
159
|
-
# "clicks": 3,
|
160
|
-
# "several_clicks": true,
|
161
|
-
# "tags": [
|
162
|
-
# "_aggregatetimeout"
|
163
|
-
# ]
|
164
|
-
# }
|
165
|
-
# ----------------------------------
|
166
|
-
#
|
167
|
-
# ==== Example #4 : no end event and tasks come one after the other
|
168
|
-
#
|
169
|
-
# Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
|
170
|
-
# That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
|
171
|
-
# In that case, you don't want to wait task timeout to flush aggregation map.
|
172
|
-
# * A typical case is aggregating results from jdbc input plugin.
|
173
|
-
# * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
|
174
|
-
# * Using jdbc input plugin, you get these 3 events from :
|
175
|
-
# [source,json]
|
176
|
-
# ----------------------------------
|
177
|
-
# { "country_name": "France", "town_name": "Paris" }
|
178
|
-
# { "country_name": "France", "town_name": "Marseille" }
|
179
|
-
# { "country_name": "USA", "town_name": "New-York" }
|
180
|
-
# ----------------------------------
|
181
|
-
# * And you would like these 2 result events to push them into elasticsearch :
|
182
|
-
# [source,json]
|
183
|
-
# ----------------------------------
|
184
|
-
# { "country_name": "France", "
|
185
|
-
# { "country_name": "USA", "
|
186
|
-
# ----------------------------------
|
187
|
-
# * You can do that using `push_previous_map_as_event` aggregate plugin option :
|
188
|
-
# [source,ruby]
|
189
|
-
# ----------------------------------
|
190
|
-
#
|
191
|
-
# aggregate {
|
192
|
-
#
|
193
|
-
#
|
194
|
-
#
|
195
|
-
#
|
196
|
-
#
|
197
|
-
#
|
198
|
-
#
|
199
|
-
#
|
200
|
-
#
|
201
|
-
#
|
202
|
-
#
|
203
|
-
#
|
204
|
-
#
|
205
|
-
#
|
206
|
-
#
|
207
|
-
#
|
208
|
-
#
|
209
|
-
#
|
210
|
-
# *
|
211
|
-
# *
|
212
|
-
# *
|
213
|
-
#
|
214
|
-
#
|
215
|
-
#
|
216
|
-
# *
|
217
|
-
# *
|
218
|
-
# *
|
219
|
-
# *
|
220
|
-
#
|
221
|
-
#
|
222
|
-
#
|
223
|
-
# *
|
224
|
-
# *
|
225
|
-
#
|
226
|
-
#
|
227
|
-
#
|
228
|
-
# *
|
229
|
-
#
|
230
|
-
#
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
#
|
235
|
-
#
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
|
246
|
-
|
247
|
-
#
|
248
|
-
#
|
249
|
-
#
|
250
|
-
#
|
251
|
-
#
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
259
|
-
#
|
260
|
-
#
|
261
|
-
#
|
262
|
-
#
|
263
|
-
|
264
|
-
|
265
|
-
#
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
#
|
270
|
-
#
|
271
|
-
#
|
272
|
-
#
|
273
|
-
#
|
274
|
-
#
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
#
|
279
|
-
|
280
|
-
|
281
|
-
#
|
282
|
-
#
|
283
|
-
#
|
284
|
-
|
285
|
-
|
286
|
-
#
|
287
|
-
#
|
288
|
-
#
|
289
|
-
#
|
290
|
-
#
|
291
|
-
#
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
# When
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
# The
|
306
|
-
#
|
307
|
-
#
|
308
|
-
#
|
309
|
-
#
|
310
|
-
#
|
311
|
-
#
|
312
|
-
|
313
|
-
|
314
|
-
#
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
#
|
325
|
-
#
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
#
|
332
|
-
|
333
|
-
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
#
|
339
|
-
|
340
|
-
|
341
|
-
#
|
342
|
-
|
343
|
-
|
344
|
-
#
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
@@
|
426
|
-
end
|
427
|
-
|
428
|
-
#
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
@@aggregate_maps
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
#
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
rescue => exception
|
518
|
-
@logger.error("Aggregate exception occurred",
|
519
|
-
:error => exception,
|
520
|
-
:
|
521
|
-
:
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
#
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
#
|
586
|
-
def
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "logstash/filters/base"
|
4
|
+
require "logstash/namespace"
|
5
|
+
require "thread"
|
6
|
+
require "logstash/util/decorators"
|
7
|
+
|
8
|
+
#
|
9
|
+
# The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
|
10
|
+
# and finally push aggregated information into final task event.
|
11
|
+
#
|
12
|
+
# You should be very careful to set Logstash filter workers to 1 (`-w 1` flag) for this filter to work correctly
|
13
|
+
# otherwise events may be processed out of sequence and unexpected results will occur.
|
14
|
+
#
|
15
|
+
# ==== Example #1
|
16
|
+
#
|
17
|
+
# * with these given logs :
|
18
|
+
# [source,ruby]
|
19
|
+
# ----------------------------------
|
20
|
+
# INFO - 12345 - TASK_START - start
|
21
|
+
# INFO - 12345 - SQL - sqlQuery1 - 12
|
22
|
+
# INFO - 12345 - SQL - sqlQuery2 - 34
|
23
|
+
# INFO - 12345 - TASK_END - end
|
24
|
+
# ----------------------------------
|
25
|
+
#
|
26
|
+
# * you can aggregate "sql duration" for the whole task with this configuration :
|
27
|
+
# [source,ruby]
|
28
|
+
# ----------------------------------
|
29
|
+
# filter {
|
30
|
+
# grok {
|
31
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
32
|
+
# }
|
33
|
+
#
|
34
|
+
# if [logger] == "TASK_START" {
|
35
|
+
# aggregate {
|
36
|
+
# task_id => "%{taskid}"
|
37
|
+
# code => "map['sql_duration'] = 0"
|
38
|
+
# map_action => "create"
|
39
|
+
# }
|
40
|
+
# }
|
41
|
+
#
|
42
|
+
# if [logger] == "SQL" {
|
43
|
+
# aggregate {
|
44
|
+
# task_id => "%{taskid}"
|
45
|
+
# code => "map['sql_duration'] += event.get('duration')"
|
46
|
+
# map_action => "update"
|
47
|
+
# }
|
48
|
+
# }
|
49
|
+
#
|
50
|
+
# if [logger] == "TASK_END" {
|
51
|
+
# aggregate {
|
52
|
+
# task_id => "%{taskid}"
|
53
|
+
# code => "event.set('sql_duration', map['sql_duration'])"
|
54
|
+
# map_action => "update"
|
55
|
+
# end_of_task => true
|
56
|
+
# timeout => 120
|
57
|
+
# }
|
58
|
+
# }
|
59
|
+
# }
|
60
|
+
# ----------------------------------
|
61
|
+
#
|
62
|
+
# * the final event then looks like :
|
63
|
+
# [source,ruby]
|
64
|
+
# ----------------------------------
|
65
|
+
# {
|
66
|
+
# "message" => "INFO - 12345 - TASK_END - end message",
|
67
|
+
# "sql_duration" => 46
|
68
|
+
# }
|
69
|
+
# ----------------------------------
|
70
|
+
#
|
71
|
+
# the field `sql_duration` is added and contains the sum of all sql queries durations.
|
72
|
+
#
|
73
|
+
# ==== Example #2 : no start event
|
74
|
+
#
|
75
|
+
# * If you have the same logs than example #1, but without a start log :
|
76
|
+
# [source,ruby]
|
77
|
+
# ----------------------------------
|
78
|
+
# INFO - 12345 - SQL - sqlQuery1 - 12
|
79
|
+
# INFO - 12345 - SQL - sqlQuery2 - 34
|
80
|
+
# INFO - 12345 - TASK_END - end
|
81
|
+
# ----------------------------------
|
82
|
+
#
|
83
|
+
# * you can also aggregate "sql duration" with a slightly different configuration :
|
84
|
+
# [source,ruby]
|
85
|
+
# ----------------------------------
|
86
|
+
# filter {
|
87
|
+
# grok {
|
88
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
89
|
+
# }
|
90
|
+
#
|
91
|
+
# if [logger] == "SQL" {
|
92
|
+
# aggregate {
|
93
|
+
# task_id => "%{taskid}"
|
94
|
+
# code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event.get('duration')"
|
95
|
+
# }
|
96
|
+
# }
|
97
|
+
#
|
98
|
+
# if [logger] == "TASK_END" {
|
99
|
+
# aggregate {
|
100
|
+
# task_id => "%{taskid}"
|
101
|
+
# code => "event.set('sql_duration', map['sql_duration'])"
|
102
|
+
# end_of_task => true
|
103
|
+
# timeout => 120
|
104
|
+
# }
|
105
|
+
# }
|
106
|
+
# }
|
107
|
+
# ----------------------------------
|
108
|
+
#
|
109
|
+
# * the final event is exactly the same than example #1
|
110
|
+
# * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
|
111
|
+
#
|
112
|
+
#
|
113
|
+
# ==== Example #3 : no end event
|
114
|
+
#
|
115
|
+
# Third use case: You have no specific end event.
|
116
|
+
#
|
117
|
+
# A typical case is aggregating or tracking user behaviour. We can track a user by its ID through the events, however once the user stops interacting, the events stop coming in. There is no specific event indicating the end of the user's interaction.
|
118
|
+
#
|
119
|
+
# In this case, we can enable the option 'push_map_as_event_on_timeout' to enable pushing the aggregation map as a new event when a timeout occurs.
|
120
|
+
# In addition, we can enable 'timeout_code' to execute code on the populated timeout event.
|
121
|
+
# We can also add 'timeout_task_id_field' so we can correlate the task_id, which in this case would be the user's ID.
|
122
|
+
#
|
123
|
+
# * Given these logs:
|
124
|
+
#
|
125
|
+
# [source,ruby]
|
126
|
+
# ----------------------------------
|
127
|
+
# INFO - 12345 - Clicked One
|
128
|
+
# INFO - 12345 - Clicked Two
|
129
|
+
# INFO - 12345 - Clicked Three
|
130
|
+
# ----------------------------------
|
131
|
+
#
|
132
|
+
# * You can aggregate the amount of clicks the user did like this:
|
133
|
+
#
|
134
|
+
# [source,ruby]
|
135
|
+
# ----------------------------------
|
136
|
+
# filter {
|
137
|
+
# grok {
|
138
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:user_id} - %{GREEDYDATA:msg_text}" ]
|
139
|
+
# }
|
140
|
+
#
|
141
|
+
# aggregate {
|
142
|
+
# task_id => "%{user_id}"
|
143
|
+
# code => "map['clicks'] ||= 0; map['clicks'] += 1;"
|
144
|
+
# push_map_as_event_on_timeout => true
|
145
|
+
# timeout_task_id_field => "user_id"
|
146
|
+
# timeout => 600 # 10 minutes timeout
|
147
|
+
# timeout_tags => ['_aggregatetimeout']
|
148
|
+
# timeout_code => "event.set('several_clicks', event.get('clicks') > 1)"
|
149
|
+
# }
|
150
|
+
# }
|
151
|
+
# ----------------------------------
|
152
|
+
#
|
153
|
+
# * After ten minutes, this will yield an event like:
|
154
|
+
#
|
155
|
+
# [source,json]
|
156
|
+
# ----------------------------------
|
157
|
+
# {
|
158
|
+
# "user_id": "12345",
|
159
|
+
# "clicks": 3,
|
160
|
+
# "several_clicks": true,
|
161
|
+
# "tags": [
|
162
|
+
# "_aggregatetimeout"
|
163
|
+
# ]
|
164
|
+
# }
|
165
|
+
# ----------------------------------
|
166
|
+
#
|
167
|
+
# ==== Example #4 : no end event and tasks come one after the other
|
168
|
+
#
|
169
|
+
# Fourth use case : like example #3, you have no specific end event, but also, tasks come one after the other.
|
170
|
+
# That is to say : tasks are not interlaced. All task1 events come, then all task2 events come, ...
|
171
|
+
# In that case, you don't want to wait task timeout to flush aggregation map.
|
172
|
+
# * A typical case is aggregating results from jdbc input plugin.
|
173
|
+
# * Given that you have this SQL query : `SELECT country_name, town_name FROM town`
|
174
|
+
# * Using jdbc input plugin, you get these 3 events from :
|
175
|
+
# [source,json]
|
176
|
+
# ----------------------------------
|
177
|
+
# { "country_name": "France", "town_name": "Paris" }
|
178
|
+
# { "country_name": "France", "town_name": "Marseille" }
|
179
|
+
# { "country_name": "USA", "town_name": "New-York" }
|
180
|
+
# ----------------------------------
|
181
|
+
# * And you would like these 2 result events to push them into elasticsearch :
|
182
|
+
# [source,json]
|
183
|
+
# ----------------------------------
|
184
|
+
# { "country_name": "France", "towns": [ {"town_name": "Paris"}, {"town_name": "Marseille"} ] }
|
185
|
+
# { "country_name": "USA", "towns": [ {"town_name": "New-York"} ] }
|
186
|
+
# ----------------------------------
|
187
|
+
# * You can do that using `push_previous_map_as_event` aggregate plugin option :
|
188
|
+
# [source,ruby]
|
189
|
+
# ----------------------------------
|
190
|
+
# filter {
|
191
|
+
# aggregate {
|
192
|
+
# task_id => "%{country_name}"
|
193
|
+
# code => "
|
194
|
+
# map['country_name'] = event.get('country_name')
|
195
|
+
# map['towns'] ||= []
|
196
|
+
# map['towns'] << {'town_name' => event.get('town_name')}
|
197
|
+
# event.cancel()
|
198
|
+
# "
|
199
|
+
# push_previous_map_as_event => true
|
200
|
+
# timeout => 3
|
201
|
+
# }
|
202
|
+
# }
|
203
|
+
# ----------------------------------
|
204
|
+
# * The key point is that each time aggregate plugin detects a new `country_name`, it pushes previous aggregate map as a new Logstash event, and then creates a new empty map for the next country
|
205
|
+
# * When 5s timeout comes, the last aggregate map is pushed as a new event
|
206
|
+
# * Finally, initial events (which are not aggregated) are dropped because useless (thanks to `event.cancel()`)
|
207
|
+
#
|
208
|
+
#
|
209
|
+
# ==== How it works
|
210
|
+
# * the filter needs a "task_id" to correlate events (log lines) of a same task
|
211
|
+
# * at the task beggining, filter creates a map, attached to task_id
|
212
|
+
# * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
|
213
|
+
# * in the final event, you can execute a last code (for instance, add map data to final event)
|
214
|
+
# * after the final event, the map attached to task is deleted (thanks to `end_of_task => true`)
|
215
|
+
# * an aggregate map is tied to one task_id value which is tied to one task_id pattern. So if you have 2 filters with different task_id patterns, even if you have same task_id value, they won't share the same aggregate map.
|
216
|
+
# * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
217
|
+
# * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
218
|
+
# * all timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : timeout, timeout_code, push_map_as_event_on_timeout, push_previous_map_as_event, timeout_task_id_field, timeout_tags
|
219
|
+
# * if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
220
|
+
#
|
221
|
+
#
|
222
|
+
# ==== Use Cases
|
223
|
+
# * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
|
224
|
+
# * extract error information in any task log line, and push it in final task event (to get a final event with all error information if any)
|
225
|
+
# * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
|
226
|
+
# * extract all http headers logged in several lines to push this list in final task event (complete http request info)
|
227
|
+
# * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
|
228
|
+
# * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
|
229
|
+
#
|
230
|
+
#
|
231
|
+
class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
232
|
+
|
233
|
+
|
234
|
+
# ############## #
|
235
|
+
# CONFIG OPTIONS #
|
236
|
+
# ############## #
|
237
|
+
|
238
|
+
|
239
|
+
config_name "aggregate"
|
240
|
+
|
241
|
+
# The expression defining task ID to correlate logs.
|
242
|
+
#
|
243
|
+
# This value must uniquely identify the task.
|
244
|
+
#
|
245
|
+
# Example:
|
246
|
+
# [source,ruby]
|
247
|
+
# filter {
|
248
|
+
# aggregate {
|
249
|
+
# task_id => "%{type}%{my_task_id}"
|
250
|
+
# }
|
251
|
+
# }
|
252
|
+
config :task_id, :validate => :string, :required => true
|
253
|
+
|
254
|
+
# The code to execute to update map, using current event.
|
255
|
+
#
|
256
|
+
# Or on the contrary, the code to execute to update event, using current map.
|
257
|
+
#
|
258
|
+
# You will have a 'map' variable and an 'event' variable available (that is the event itself).
|
259
|
+
#
|
260
|
+
# Example:
|
261
|
+
# [source,ruby]
|
262
|
+
# filter {
|
263
|
+
# aggregate {
|
264
|
+
# code => "map['sql_duration'] += event.get('duration')"
|
265
|
+
# }
|
266
|
+
# }
|
267
|
+
config :code, :validate => :string, :required => true
|
268
|
+
|
269
|
+
# Tell the filter what to do with aggregate map.
|
270
|
+
#
|
271
|
+
# `"create"`: create the map, and execute the code only if map wasn't created before
|
272
|
+
#
|
273
|
+
# `"update"`: doesn't create the map, and execute the code only if map was created before
|
274
|
+
#
|
275
|
+
# `"create_or_update"`: create the map if it wasn't created before, execute the code in all cases
|
276
|
+
config :map_action, :validate => :string, :default => "create_or_update"
|
277
|
+
|
278
|
+
# Tell the filter that task is ended, and therefore, to delete aggregate map after code execution.
|
279
|
+
config :end_of_task, :validate => :boolean, :default => false
|
280
|
+
|
281
|
+
# The path to file where aggregate maps are stored when Logstash stops
|
282
|
+
# and are loaded from when Logstash starts.
|
283
|
+
#
|
284
|
+
# If not defined, aggregate maps will not be stored at Logstash stop and will be lost.
|
285
|
+
# Must be defined in only one aggregate filter (as aggregate maps are global).
|
286
|
+
#
|
287
|
+
# Example:
|
288
|
+
# [source,ruby]
|
289
|
+
# filter {
|
290
|
+
# aggregate {
|
291
|
+
# aggregate_maps_path => "/path/to/.aggregate_maps"
|
292
|
+
# }
|
293
|
+
# }
|
294
|
+
config :aggregate_maps_path, :validate => :string, :required => false
|
295
|
+
|
296
|
+
# The amount of seconds after a task "end event" can be considered lost.
|
297
|
+
#
|
298
|
+
# When timeout occurs for a task, The task "map" is evicted.
|
299
|
+
#
|
300
|
+
# Timeout can be defined for each "task_id" pattern.
|
301
|
+
#
|
302
|
+
# If no timeout is defined, default timeout will be applied : 1800 seconds.
|
303
|
+
config :timeout, :validate => :number, :required => false
|
304
|
+
|
305
|
+
# The code to execute to complete timeout generated event, when `'push_map_as_event_on_timeout'` or `'push_previous_map_as_event'` is set to true.
|
306
|
+
# The code block will have access to the newly generated timeout event that is pre-populated with the aggregation map.
|
307
|
+
#
|
308
|
+
# If `'timeout_task_id_field'` is set, the event is also populated with the task_id value
|
309
|
+
#
|
310
|
+
# Example:
|
311
|
+
# [source,ruby]
|
312
|
+
# filter {
|
313
|
+
# aggregate {
|
314
|
+
# timeout_code => "event.set('state', 'timeout')"
|
315
|
+
# }
|
316
|
+
# }
|
317
|
+
config :timeout_code, :validate => :string, :required => false
|
318
|
+
|
319
|
+
# When this option is enabled, each time a task timeout is detected, it pushes task aggregation map as a new Logstash event.
|
320
|
+
# This enables to detect and process task timeouts in Logstash, but also to manage tasks that have no explicit end event.
|
321
|
+
config :push_map_as_event_on_timeout, :validate => :boolean, :required => false, :default => false
|
322
|
+
|
323
|
+
# When this option is enabled, each time aggregate plugin detects a new task id, it pushes previous aggregate map as a new Logstash event,
|
324
|
+
# and then creates a new empty map for the next task.
|
325
|
+
#
|
326
|
+
# WARNING: this option works fine only if tasks come one after the other. It means : all task1 events, then all task2 events, etc...
|
327
|
+
config :push_previous_map_as_event, :validate => :boolean, :required => false, :default => false
|
328
|
+
|
329
|
+
# This option indicates the timeout generated event's field for the "task_id" value.
|
330
|
+
# The task id will then be set into the timeout event. This can help correlate which tasks have been timed out.
|
331
|
+
#
|
332
|
+
# For example, with option `timeout_task_id_field => "my_id"` ,when timeout task id is `"12345"`, the generated timeout event will contain `'my_id' => '12345'`.
|
333
|
+
#
|
334
|
+
# By default, if this option is not set, task id value won't be set into timeout generated event.
|
335
|
+
config :timeout_task_id_field, :validate => :string, :required => false
|
336
|
+
|
337
|
+
# Defines tags to add when a timeout event is generated and yield
|
338
|
+
#
|
339
|
+
# Example:
|
340
|
+
# [source,ruby]
|
341
|
+
# filter {
|
342
|
+
# aggregate {
|
343
|
+
# timeout_tags => ["aggregate_timeout']
|
344
|
+
# }
|
345
|
+
# }
|
346
|
+
config :timeout_tags, :validate => :array, :required => false, :default => []
|
347
|
+
|
348
|
+
|
349
|
+
# ################ #
|
350
|
+
# STATIC VARIABLES #
|
351
|
+
# ################ #
|
352
|
+
|
353
|
+
|
354
|
+
# Default timeout (in seconds) when not defined in plugin configuration
|
355
|
+
DEFAULT_TIMEOUT = 1800
|
356
|
+
|
357
|
+
# This is the state of the filter.
|
358
|
+
# For each entry, key is "task_id" and value is a map freely updatable by 'code' config
|
359
|
+
@@aggregate_maps = {}
|
360
|
+
|
361
|
+
# Mutex used to synchronize access to 'aggregate_maps'
|
362
|
+
@@mutex = Mutex.new
|
363
|
+
|
364
|
+
# Default timeout for task_id patterns where timeout is not defined in Logstash filter configuration
|
365
|
+
@@default_timeout = nil
|
366
|
+
|
367
|
+
# For each "task_id" pattern, defines which Aggregate instance will process flush() call, processing expired Aggregate elements (older than timeout)
|
368
|
+
# For each entry, key is "task_id pattern" and value is "aggregate instance"
|
369
|
+
@@flush_instance_map = {}
|
370
|
+
|
371
|
+
# last time where timeout management in flush() method was launched, per "task_id" pattern
|
372
|
+
@@last_flush_timestamp_map = {}
|
373
|
+
|
374
|
+
# flag indicating if aggregate_maps_path option has been already set on one aggregate instance
|
375
|
+
@@aggregate_maps_path_set = false
|
376
|
+
|
377
|
+
# defines which Aggregate instance will close Aggregate static variables
|
378
|
+
@@static_close_instance = nil
|
379
|
+
|
380
|
+
|
381
|
+
# ####### #
|
382
|
+
# METHODS #
|
383
|
+
# ####### #
|
384
|
+
|
385
|
+
|
386
|
+
# Initialize plugin
|
387
|
+
public
|
388
|
+
def register
|
389
|
+
|
390
|
+
@logger.debug("Aggregate register call", :code => @code)
|
391
|
+
|
392
|
+
# validate task_id option
|
393
|
+
if !@task_id.match(/%\{.+\}/)
|
394
|
+
raise LogStash::ConfigurationError, "Aggregate plugin: task_id pattern '#{@task_id}' must contain a dynamic expression like '%{field}'"
|
395
|
+
end
|
396
|
+
|
397
|
+
# process lambda expression to call in each filter call
|
398
|
+
eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
|
399
|
+
|
400
|
+
# process lambda expression to call in the timeout case or previous event case
|
401
|
+
if @timeout_code
|
402
|
+
eval("@timeout_codeblock = lambda { |event| #{@timeout_code} }", binding, "(aggregate filter timeout code)")
|
403
|
+
end
|
404
|
+
|
405
|
+
@@mutex.synchronize do
|
406
|
+
|
407
|
+
# timeout management : define eviction_instance for current task_id pattern
|
408
|
+
if has_timeout_options?
|
409
|
+
if @@flush_instance_map.has_key?(@task_id)
|
410
|
+
# all timeout options have to be defined in only one aggregate filter per task_id pattern
|
411
|
+
raise LogStash::ConfigurationError, "Aggregate plugin: For task_id pattern '#{@task_id}', there are more than one filter which defines timeout options. All timeout options have to be defined in only one aggregate filter per task_id pattern. Timeout options are : #{display_timeout_options}"
|
412
|
+
end
|
413
|
+
@@flush_instance_map[@task_id] = self
|
414
|
+
@logger.debug("Aggregate timeout for '#{@task_id}' pattern: #{@timeout} seconds")
|
415
|
+
end
|
416
|
+
|
417
|
+
# timeout management : define default_timeout
|
418
|
+
if !@timeout.nil? && (@@default_timeout.nil? || @timeout < @@default_timeout)
|
419
|
+
@@default_timeout = @timeout
|
420
|
+
@logger.debug("Aggregate default timeout: #{@timeout} seconds")
|
421
|
+
end
|
422
|
+
|
423
|
+
# reinit static_close_instance (if necessary)
|
424
|
+
if !@@aggregate_maps_path_set && !@@static_close_instance.nil?
|
425
|
+
@@static_close_instance = nil
|
426
|
+
end
|
427
|
+
|
428
|
+
# check if aggregate_maps_path option has already been set on another instance else set @@aggregate_maps_path_set
|
429
|
+
if !@aggregate_maps_path.nil?
|
430
|
+
if @@aggregate_maps_path_set
|
431
|
+
@@aggregate_maps_path_set = false
|
432
|
+
raise LogStash::ConfigurationError, "Aggregate plugin: Option 'aggregate_maps_path' must be set on only one aggregate filter"
|
433
|
+
else
|
434
|
+
@@aggregate_maps_path_set = true
|
435
|
+
@@static_close_instance = self
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
# load aggregate maps from file (if option defined)
|
440
|
+
if !@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path)
|
441
|
+
File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps.merge!(Marshal.load(from_file)) }
|
442
|
+
File.delete(@aggregate_maps_path)
|
443
|
+
@logger.info("Aggregate maps loaded from : #{@aggregate_maps_path}")
|
444
|
+
end
|
445
|
+
|
446
|
+
# init aggregate_maps
|
447
|
+
@@aggregate_maps[@task_id] ||= {}
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Called when Logstash stops
|
452
|
+
public
|
453
|
+
def close
|
454
|
+
|
455
|
+
@logger.debug("Aggregate close call", :code => @code)
|
456
|
+
|
457
|
+
# define static close instance if none is already defined
|
458
|
+
@@static_close_instance = self if @@static_close_instance.nil?
|
459
|
+
|
460
|
+
if @@static_close_instance == self
|
461
|
+
# store aggregate maps to file (if option defined)
|
462
|
+
@@mutex.synchronize do
|
463
|
+
@@aggregate_maps.delete_if { |key, value| value.empty? }
|
464
|
+
if !@aggregate_maps_path.nil? && !@@aggregate_maps.empty?
|
465
|
+
File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
|
466
|
+
@logger.info("Aggregate maps stored to : #{@aggregate_maps_path}")
|
467
|
+
end
|
468
|
+
@@aggregate_maps.clear()
|
469
|
+
end
|
470
|
+
|
471
|
+
# reinit static variables for Logstash reload
|
472
|
+
@@default_timeout = nil
|
473
|
+
@@flush_instance_map = {}
|
474
|
+
@@last_flush_timestamp_map = {}
|
475
|
+
@@aggregate_maps_path_set = false
|
476
|
+
end
|
477
|
+
|
478
|
+
end
|
479
|
+
|
480
|
+
# This method is invoked each time an event matches the filter
|
481
|
+
public
|
482
|
+
def filter(event)
|
483
|
+
|
484
|
+
# define task id
|
485
|
+
task_id = event.sprintf(@task_id)
|
486
|
+
return if task_id.nil? || task_id == @task_id
|
487
|
+
|
488
|
+
noError = false
|
489
|
+
event_to_yield = nil
|
490
|
+
|
491
|
+
# protect aggregate_maps against concurrent access, using a mutex
|
492
|
+
@@mutex.synchronize do
|
493
|
+
|
494
|
+
# retrieve the current aggregate map
|
495
|
+
aggregate_maps_element = @@aggregate_maps[@task_id][task_id]
|
496
|
+
|
497
|
+
|
498
|
+
# create aggregate map, if it doesn't exist
|
499
|
+
if aggregate_maps_element.nil?
|
500
|
+
return if @map_action == "update"
|
501
|
+
# create new event from previous map, if @push_previous_map_as_event is enabled
|
502
|
+
if @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
|
503
|
+
event_to_yield = extract_previous_map_as_event()
|
504
|
+
end
|
505
|
+
aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
|
506
|
+
@@aggregate_maps[@task_id][task_id] = aggregate_maps_element
|
507
|
+
else
|
508
|
+
return if @map_action == "create"
|
509
|
+
end
|
510
|
+
map = aggregate_maps_element.map
|
511
|
+
|
512
|
+
# execute the code to read/update map and event
|
513
|
+
begin
|
514
|
+
@codeblock.call(event, map)
|
515
|
+
@logger.debug("Aggregate successful filter code execution", :code => @code)
|
516
|
+
noError = true
|
517
|
+
rescue => exception
|
518
|
+
@logger.error("Aggregate exception occurred",
|
519
|
+
:error => exception,
|
520
|
+
:code => @code,
|
521
|
+
:map => map,
|
522
|
+
:event_data => event.to_hash_with_metadata)
|
523
|
+
event.tag("_aggregateexception")
|
524
|
+
end
|
525
|
+
|
526
|
+
# delete the map if task is ended
|
527
|
+
@@aggregate_maps[@task_id].delete(task_id) if @end_of_task
|
528
|
+
|
529
|
+
end
|
530
|
+
|
531
|
+
# match the filter, only if no error occurred
|
532
|
+
filter_matched(event) if noError
|
533
|
+
|
534
|
+
# yield previous map as new event if set
|
535
|
+
yield event_to_yield unless event_to_yield.nil?
|
536
|
+
|
537
|
+
end
|
538
|
+
|
539
|
+
# Create a new event from the aggregation_map and the corresponding task_id
|
540
|
+
# This will create the event and
|
541
|
+
# if @timeout_task_id_field is set, it will set the task_id on the timeout event
|
542
|
+
# if @timeout_code is set, it will execute the timeout code on the created timeout event
|
543
|
+
# returns the newly created event
|
544
|
+
def create_timeout_event(aggregation_map, task_id)
|
545
|
+
|
546
|
+
@logger.debug("Aggregate create_timeout_event call with task_id '#{task_id}'")
|
547
|
+
|
548
|
+
event_to_yield = LogStash::Event.new(aggregation_map)
|
549
|
+
|
550
|
+
if @timeout_task_id_field
|
551
|
+
event_to_yield.set(@timeout_task_id_field, task_id)
|
552
|
+
end
|
553
|
+
|
554
|
+
LogStash::Util::Decorators.add_tags(@timeout_tags, event_to_yield, "filters/#{self.class.name}")
|
555
|
+
|
556
|
+
# Call code block if available
|
557
|
+
if @timeout_code
|
558
|
+
begin
|
559
|
+
@timeout_codeblock.call(event_to_yield)
|
560
|
+
rescue => exception
|
561
|
+
@logger.error("Aggregate exception occurred",
|
562
|
+
:error => exception,
|
563
|
+
:timeout_code => @timeout_code,
|
564
|
+
:timeout_event_data => event_to_yield.to_hash_with_metadata)
|
565
|
+
event_to_yield.tag("_aggregateexception")
|
566
|
+
end
|
567
|
+
end
|
568
|
+
|
569
|
+
return event_to_yield
|
570
|
+
end
|
571
|
+
|
572
|
+
# Extract the previous map in aggregate maps, and return it as a new Logstash event
|
573
|
+
def extract_previous_map_as_event
|
574
|
+
previous_entry = @@aggregate_maps[@task_id].shift()
|
575
|
+
previous_task_id = previous_entry[0]
|
576
|
+
previous_map = previous_entry[1].map
|
577
|
+
return create_timeout_event(previous_map, previous_task_id)
|
578
|
+
end
|
579
|
+
|
580
|
+
# Necessary to indicate Logstash to periodically call 'flush' method
|
581
|
+
def periodic_flush
|
582
|
+
true
|
583
|
+
end
|
584
|
+
|
585
|
+
# This method is invoked by LogStash every 5 seconds.
|
586
|
+
def flush(options = {})
|
587
|
+
|
588
|
+
@logger.debug("Aggregate flush call with #{options}")
|
589
|
+
|
590
|
+
# Protection against no timeout defined by Logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
|
591
|
+
if @@default_timeout.nil?
|
592
|
+
@@default_timeout = DEFAULT_TIMEOUT
|
593
|
+
end
|
594
|
+
if !@@flush_instance_map.has_key?(@task_id)
|
595
|
+
@@flush_instance_map[@task_id] = self
|
596
|
+
@timeout = @@default_timeout
|
597
|
+
elsif @@flush_instance_map[@task_id].timeout.nil?
|
598
|
+
@@flush_instance_map[@task_id].timeout = @@default_timeout
|
599
|
+
end
|
600
|
+
|
601
|
+
# Launch timeout management only every interval of (@timeout / 2) seconds or at Logstash shutdown
|
602
|
+
if @@flush_instance_map[@task_id] == self && (!@@last_flush_timestamp_map.has_key?(@task_id) || Time.now > @@last_flush_timestamp_map[@task_id] + @timeout / 2 || options[:final])
|
603
|
+
events_to_flush = remove_expired_maps()
|
604
|
+
|
605
|
+
# at Logstash shutdown, if push_previous_map_as_event is enabled, it's important to force flush (particularly for jdbc input plugin)
|
606
|
+
if options[:final] && @push_previous_map_as_event && !@@aggregate_maps[@task_id].empty?
|
607
|
+
events_to_flush << extract_previous_map_as_event()
|
608
|
+
end
|
609
|
+
|
610
|
+
# tag flushed events, indicating "final flush" special event
|
611
|
+
if options[:final]
|
612
|
+
events_to_flush.each { |event_to_flush| event_to_flush.tag("_aggregatefinalflush") }
|
613
|
+
end
|
614
|
+
|
615
|
+
# update last flush timestamp
|
616
|
+
@@last_flush_timestamp_map[@task_id] = Time.now
|
617
|
+
|
618
|
+
# return events to flush into Logstash pipeline
|
619
|
+
return events_to_flush
|
620
|
+
else
|
621
|
+
return []
|
622
|
+
end
|
623
|
+
|
624
|
+
end
|
625
|
+
|
626
|
+
|
627
|
+
# Remove the expired Aggregate maps from @@aggregate_maps if they are older than timeout.
|
628
|
+
# If @push_previous_map_as_event option is set, or @push_map_as_event_on_timeout is set, expired maps are returned as new events to be flushed to Logstash pipeline.
|
629
|
+
def remove_expired_maps()
|
630
|
+
events_to_flush = []
|
631
|
+
min_timestamp = Time.now - @timeout
|
632
|
+
|
633
|
+
@@mutex.synchronize do
|
634
|
+
|
635
|
+
@logger.debug("Aggregate remove_expired_maps call with '#{@task_id}' pattern and #{@@aggregate_maps[@task_id].length} maps")
|
636
|
+
|
637
|
+
@@aggregate_maps[@task_id].delete_if do |key, element|
|
638
|
+
if element.creation_timestamp < min_timestamp
|
639
|
+
if @push_previous_map_as_event || @push_map_as_event_on_timeout
|
640
|
+
events_to_flush << create_timeout_event(element.map, key)
|
641
|
+
end
|
642
|
+
next true
|
643
|
+
end
|
644
|
+
next false
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
648
|
+
return events_to_flush
|
649
|
+
end
|
650
|
+
|
651
|
+
# return if this filter instance has any timeout option enabled in Logstash configuration
|
652
|
+
def has_timeout_options?()
|
653
|
+
return (
|
654
|
+
timeout ||
|
655
|
+
timeout_code ||
|
656
|
+
push_map_as_event_on_timeout ||
|
657
|
+
push_previous_map_as_event ||
|
658
|
+
timeout_task_id_field ||
|
659
|
+
!timeout_tags.empty?
|
660
|
+
)
|
661
|
+
end
|
662
|
+
|
663
|
+
# display all possible timeout options
|
664
|
+
def display_timeout_options()
|
665
|
+
return [
|
666
|
+
"timeout",
|
667
|
+
"timeout_code",
|
668
|
+
"push_map_as_event_on_timeout",
|
669
|
+
"push_previous_map_as_event",
|
670
|
+
"timeout_task_id_field",
|
671
|
+
"timeout_tags"
|
672
|
+
].join(", ")
|
673
|
+
end
|
674
|
+
|
675
|
+
end # class LogStash::Filters::Aggregate
|
676
|
+
|
677
|
+
# Element of "aggregate_maps"
|
678
|
+
class LogStash::Filters::Aggregate::Element
|
679
|
+
|
680
|
+
attr_accessor :creation_timestamp, :map
|
681
|
+
|
682
|
+
def initialize(creation_timestamp)
|
683
|
+
@creation_timestamp = creation_timestamp
|
684
|
+
@map = {}
|
685
|
+
end
|
643
686
|
end
|