logstash-filter-aggregate 2.0.5 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BUILD.md +81 -81
- data/CHANGELOG.md +28 -24
- data/CONTRIBUTORS +13 -13
- data/Gemfile +2 -2
- data/LICENSE +13 -13
- data/README.md +167 -161
- data/lib/logstash/filters/aggregate.rb +316 -288
- data/logstash-filter-aggregate.gemspec +24 -24
- data/spec/filters/aggregate_spec.rb +210 -185
- data/spec/filters/aggregate_spec_helper.rb +49 -49
- metadata +3 -3
@@ -1,288 +1,316 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "logstash/filters/base"
|
4
|
-
require "logstash/namespace"
|
5
|
-
require "thread"
|
6
|
-
|
7
|
-
#
|
8
|
-
# The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
|
9
|
-
# and finally push aggregated information into final task event.
|
10
|
-
#
|
11
|
-
# You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
|
12
|
-
# correctly otherwise documents
|
13
|
-
# may be processed out of sequence and unexpected results will occur.
|
14
|
-
#
|
15
|
-
# ==== Example #1
|
16
|
-
#
|
17
|
-
# * with these given logs :
|
18
|
-
# [source,ruby]
|
19
|
-
# ----------------------------------
|
20
|
-
# INFO - 12345 - TASK_START - start
|
21
|
-
# INFO - 12345 - SQL - sqlQuery1 - 12
|
22
|
-
# INFO - 12345 - SQL - sqlQuery2 - 34
|
23
|
-
# INFO - 12345 - TASK_END - end
|
24
|
-
# ----------------------------------
|
25
|
-
#
|
26
|
-
# * you can aggregate "sql duration" for the whole task with this configuration :
|
27
|
-
# [source,ruby]
|
28
|
-
# ----------------------------------
|
29
|
-
# filter {
|
30
|
-
# grok {
|
31
|
-
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
32
|
-
# }
|
33
|
-
#
|
34
|
-
# if [logger] == "TASK_START" {
|
35
|
-
# aggregate {
|
36
|
-
# task_id => "%{taskid}"
|
37
|
-
# code => "map['sql_duration'] = 0"
|
38
|
-
# map_action => "create"
|
39
|
-
# }
|
40
|
-
# }
|
41
|
-
#
|
42
|
-
# if [logger] == "SQL" {
|
43
|
-
# aggregate {
|
44
|
-
# task_id => "%{taskid}"
|
45
|
-
# code => "map['sql_duration'] += event['duration']"
|
46
|
-
# map_action => "update"
|
47
|
-
# }
|
48
|
-
# }
|
49
|
-
#
|
50
|
-
# if [logger] == "TASK_END" {
|
51
|
-
# aggregate {
|
52
|
-
# task_id => "%{taskid}"
|
53
|
-
# code => "event['sql_duration'] = map['sql_duration']"
|
54
|
-
# map_action => "update"
|
55
|
-
# end_of_task => true
|
56
|
-
# timeout => 120
|
57
|
-
# }
|
58
|
-
# }
|
59
|
-
# }
|
60
|
-
# ----------------------------------
|
61
|
-
#
|
62
|
-
# * the final event then looks like :
|
63
|
-
# [source,ruby]
|
64
|
-
# ----------------------------------
|
65
|
-
# {
|
66
|
-
# "message" => "INFO - 12345 - TASK_END - end message",
|
67
|
-
# "sql_duration" => 46
|
68
|
-
# }
|
69
|
-
# ----------------------------------
|
70
|
-
#
|
71
|
-
# the field `sql_duration` is added and contains the sum of all sql queries durations.
|
72
|
-
#
|
73
|
-
# ==== Example #2
|
74
|
-
#
|
75
|
-
# * If you have the same logs than example #1, but without a start log :
|
76
|
-
# [source,ruby]
|
77
|
-
# ----------------------------------
|
78
|
-
# INFO - 12345 - SQL - sqlQuery1 - 12
|
79
|
-
# INFO - 12345 - SQL - sqlQuery2 - 34
|
80
|
-
# INFO - 12345 - TASK_END - end
|
81
|
-
# ----------------------------------
|
82
|
-
#
|
83
|
-
# * you can also aggregate "sql duration" with a slightly different configuration :
|
84
|
-
# [source,ruby]
|
85
|
-
# ----------------------------------
|
86
|
-
# filter {
|
87
|
-
# grok {
|
88
|
-
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
89
|
-
# }
|
90
|
-
#
|
91
|
-
# if [logger] == "SQL" {
|
92
|
-
# aggregate {
|
93
|
-
# task_id => "%{taskid}"
|
94
|
-
# code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
|
95
|
-
# }
|
96
|
-
# }
|
97
|
-
#
|
98
|
-
# if [logger] == "TASK_END" {
|
99
|
-
# aggregate {
|
100
|
-
# task_id => "%{taskid}"
|
101
|
-
# code => "event['sql_duration'] = map['sql_duration']"
|
102
|
-
# end_of_task => true
|
103
|
-
# timeout => 120
|
104
|
-
# }
|
105
|
-
# }
|
106
|
-
# }
|
107
|
-
# ----------------------------------
|
108
|
-
#
|
109
|
-
# * the final event is exactly the same than example #1
|
110
|
-
# * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
|
111
|
-
#
|
112
|
-
#
|
113
|
-
# ==== How it works
|
114
|
-
# * the filter needs a "task_id" to correlate events (log lines) of a same task
|
115
|
-
# * at the task beggining, filter creates a map, attached to task_id
|
116
|
-
# * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
|
117
|
-
# * in the final event, you can execute a last code (for instance, add map data to final event)
|
118
|
-
# * after the final event, the map attached to task is deleted
|
119
|
-
# * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
120
|
-
# * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
121
|
-
# * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
122
|
-
#
|
123
|
-
#
|
124
|
-
# ==== Use Cases
|
125
|
-
# * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
|
126
|
-
# * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
|
127
|
-
# * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
|
128
|
-
# * extract all http headers logged in several lines to push this list in final task event (complete http request info)
|
129
|
-
# * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
|
130
|
-
# * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
|
131
|
-
#
|
132
|
-
#
|
133
|
-
class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
134
|
-
|
135
|
-
config_name "aggregate"
|
136
|
-
|
137
|
-
# The expression defining task ID to correlate logs.
|
138
|
-
#
|
139
|
-
# This value must uniquely identify the task in the system.
|
140
|
-
#
|
141
|
-
# Example value : "%{application}%{my_task_id}"
|
142
|
-
config :task_id, :validate => :string, :required => true
|
143
|
-
|
144
|
-
# The code to execute to update map, using current event.
|
145
|
-
#
|
146
|
-
# Or on the contrary, the code to execute to update event, using current map.
|
147
|
-
#
|
148
|
-
# You will have a 'map' variable and an 'event' variable available (that is the event itself).
|
149
|
-
#
|
150
|
-
# Example value : "map['sql_duration'] += event['duration']"
|
151
|
-
config :code, :validate => :string, :required => true
|
152
|
-
|
153
|
-
# Tell the filter what to do with aggregate map.
|
154
|
-
#
|
155
|
-
# `create`: create the map, and execute the code only if map wasn't created before
|
156
|
-
#
|
157
|
-
# `update`: doesn't create the map, and execute the code only if map was created before
|
158
|
-
#
|
159
|
-
# `create_or_update`: create the map if it wasn't created before, execute the code in all cases
|
160
|
-
config :map_action, :validate => :string, :default => "create_or_update"
|
161
|
-
|
162
|
-
# Tell the filter that task is ended, and therefore, to delete map after code execution.
|
163
|
-
config :end_of_task, :validate => :boolean, :default => false
|
164
|
-
|
165
|
-
# The amount of seconds after a task "end event" can be considered lost.
|
166
|
-
#
|
167
|
-
# The task "map" is evicted.
|
168
|
-
#
|
169
|
-
# Default value (`0`) means no timeout so no auto eviction.
|
170
|
-
config :timeout, :validate => :number, :required => false, :default => 0
|
171
|
-
|
172
|
-
|
173
|
-
#
|
174
|
-
|
175
|
-
|
176
|
-
#
|
177
|
-
#
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
#
|
187
|
-
@@
|
188
|
-
|
189
|
-
#
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
end
|
278
|
-
|
279
|
-
#
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "logstash/filters/base"
|
4
|
+
require "logstash/namespace"
|
5
|
+
require "thread"
|
6
|
+
|
7
|
+
#
|
8
|
+
# The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
|
9
|
+
# and finally push aggregated information into final task event.
|
10
|
+
#
|
11
|
+
# You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
|
12
|
+
# correctly otherwise documents
|
13
|
+
# may be processed out of sequence and unexpected results will occur.
|
14
|
+
#
|
15
|
+
# ==== Example #1
|
16
|
+
#
|
17
|
+
# * with these given logs :
|
18
|
+
# [source,ruby]
|
19
|
+
# ----------------------------------
|
20
|
+
# INFO - 12345 - TASK_START - start
|
21
|
+
# INFO - 12345 - SQL - sqlQuery1 - 12
|
22
|
+
# INFO - 12345 - SQL - sqlQuery2 - 34
|
23
|
+
# INFO - 12345 - TASK_END - end
|
24
|
+
# ----------------------------------
|
25
|
+
#
|
26
|
+
# * you can aggregate "sql duration" for the whole task with this configuration :
|
27
|
+
# [source,ruby]
|
28
|
+
# ----------------------------------
|
29
|
+
# filter {
|
30
|
+
# grok {
|
31
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
32
|
+
# }
|
33
|
+
#
|
34
|
+
# if [logger] == "TASK_START" {
|
35
|
+
# aggregate {
|
36
|
+
# task_id => "%{taskid}"
|
37
|
+
# code => "map['sql_duration'] = 0"
|
38
|
+
# map_action => "create"
|
39
|
+
# }
|
40
|
+
# }
|
41
|
+
#
|
42
|
+
# if [logger] == "SQL" {
|
43
|
+
# aggregate {
|
44
|
+
# task_id => "%{taskid}"
|
45
|
+
# code => "map['sql_duration'] += event['duration']"
|
46
|
+
# map_action => "update"
|
47
|
+
# }
|
48
|
+
# }
|
49
|
+
#
|
50
|
+
# if [logger] == "TASK_END" {
|
51
|
+
# aggregate {
|
52
|
+
# task_id => "%{taskid}"
|
53
|
+
# code => "event['sql_duration'] = map['sql_duration']"
|
54
|
+
# map_action => "update"
|
55
|
+
# end_of_task => true
|
56
|
+
# timeout => 120
|
57
|
+
# }
|
58
|
+
# }
|
59
|
+
# }
|
60
|
+
# ----------------------------------
|
61
|
+
#
|
62
|
+
# * the final event then looks like :
|
63
|
+
# [source,ruby]
|
64
|
+
# ----------------------------------
|
65
|
+
# {
|
66
|
+
# "message" => "INFO - 12345 - TASK_END - end message",
|
67
|
+
# "sql_duration" => 46
|
68
|
+
# }
|
69
|
+
# ----------------------------------
|
70
|
+
#
|
71
|
+
# the field `sql_duration` is added and contains the sum of all sql queries durations.
|
72
|
+
#
|
73
|
+
# ==== Example #2
|
74
|
+
#
|
75
|
+
# * If you have the same logs than example #1, but without a start log :
|
76
|
+
# [source,ruby]
|
77
|
+
# ----------------------------------
|
78
|
+
# INFO - 12345 - SQL - sqlQuery1 - 12
|
79
|
+
# INFO - 12345 - SQL - sqlQuery2 - 34
|
80
|
+
# INFO - 12345 - TASK_END - end
|
81
|
+
# ----------------------------------
|
82
|
+
#
|
83
|
+
# * you can also aggregate "sql duration" with a slightly different configuration :
|
84
|
+
# [source,ruby]
|
85
|
+
# ----------------------------------
|
86
|
+
# filter {
|
87
|
+
# grok {
|
88
|
+
# match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
|
89
|
+
# }
|
90
|
+
#
|
91
|
+
# if [logger] == "SQL" {
|
92
|
+
# aggregate {
|
93
|
+
# task_id => "%{taskid}"
|
94
|
+
# code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
|
95
|
+
# }
|
96
|
+
# }
|
97
|
+
#
|
98
|
+
# if [logger] == "TASK_END" {
|
99
|
+
# aggregate {
|
100
|
+
# task_id => "%{taskid}"
|
101
|
+
# code => "event['sql_duration'] = map['sql_duration']"
|
102
|
+
# end_of_task => true
|
103
|
+
# timeout => 120
|
104
|
+
# }
|
105
|
+
# }
|
106
|
+
# }
|
107
|
+
# ----------------------------------
|
108
|
+
#
|
109
|
+
# * the final event is exactly the same than example #1
|
110
|
+
# * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
|
111
|
+
#
|
112
|
+
#
|
113
|
+
# ==== How it works
|
114
|
+
# * the filter needs a "task_id" to correlate events (log lines) of a same task
|
115
|
+
# * at the task beggining, filter creates a map, attached to task_id
|
116
|
+
# * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
|
117
|
+
# * in the final event, you can execute a last code (for instance, add map data to final event)
|
118
|
+
# * after the final event, the map attached to task is deleted
|
119
|
+
# * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
|
120
|
+
# * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
|
121
|
+
# * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
|
122
|
+
#
|
123
|
+
#
|
124
|
+
# ==== Use Cases
|
125
|
+
# * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
|
126
|
+
# * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
|
127
|
+
# * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
|
128
|
+
# * extract all http headers logged in several lines to push this list in final task event (complete http request info)
|
129
|
+
# * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
|
130
|
+
# * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
|
131
|
+
#
|
132
|
+
#
|
133
|
+
class LogStash::Filters::Aggregate < LogStash::Filters::Base
|
134
|
+
|
135
|
+
config_name "aggregate"
|
136
|
+
|
137
|
+
# The expression defining task ID to correlate logs.
|
138
|
+
#
|
139
|
+
# This value must uniquely identify the task in the system.
|
140
|
+
#
|
141
|
+
# Example value : "%{application}%{my_task_id}"
|
142
|
+
config :task_id, :validate => :string, :required => true
|
143
|
+
|
144
|
+
# The code to execute to update map, using current event.
|
145
|
+
#
|
146
|
+
# Or on the contrary, the code to execute to update event, using current map.
|
147
|
+
#
|
148
|
+
# You will have a 'map' variable and an 'event' variable available (that is the event itself).
|
149
|
+
#
|
150
|
+
# Example value : `"map['sql_duration'] += event['duration']"`
|
151
|
+
config :code, :validate => :string, :required => true
|
152
|
+
|
153
|
+
# Tell the filter what to do with aggregate map.
|
154
|
+
#
|
155
|
+
# `create`: create the map, and execute the code only if map wasn't created before
|
156
|
+
#
|
157
|
+
# `update`: doesn't create the map, and execute the code only if map was created before
|
158
|
+
#
|
159
|
+
# `create_or_update`: create the map if it wasn't created before, execute the code in all cases
|
160
|
+
config :map_action, :validate => :string, :default => "create_or_update"
|
161
|
+
|
162
|
+
# Tell the filter that task is ended, and therefore, to delete map after code execution.
|
163
|
+
config :end_of_task, :validate => :boolean, :default => false
|
164
|
+
|
165
|
+
# The amount of seconds after a task "end event" can be considered lost.
|
166
|
+
#
|
167
|
+
# When timeout occurs for a task, The task "map" is evicted.
|
168
|
+
#
|
169
|
+
# Default value (`0`) means no timeout so no auto eviction.
|
170
|
+
config :timeout, :validate => :number, :required => false, :default => 0
|
171
|
+
|
172
|
+
# The path to file where aggregate maps are stored when logstash stops
|
173
|
+
# and are loaded from when logstash starts.
|
174
|
+
#
|
175
|
+
# If not defined, aggregate maps will not be stored at logstash stop and will be lost.
|
176
|
+
# Should be defined for only one aggregate filter (as aggregate maps are global).
|
177
|
+
#
|
178
|
+
# Example value : `"/path/to/.aggregate_maps"`
|
179
|
+
config :aggregate_maps_path, :validate => :string, :required => false
|
180
|
+
|
181
|
+
|
182
|
+
# Default timeout (in seconds) when not defined in plugin configuration
|
183
|
+
DEFAULT_TIMEOUT = 1800
|
184
|
+
|
185
|
+
# This is the state of the filter.
|
186
|
+
# For each entry, key is "task_id" and value is a map freely updatable by 'code' config
|
187
|
+
@@aggregate_maps = {}
|
188
|
+
|
189
|
+
# Mutex used to synchronize access to 'aggregate_maps'
|
190
|
+
@@mutex = Mutex.new
|
191
|
+
|
192
|
+
# Aggregate instance which will evict all zombie Aggregate elements (older than timeout)
|
193
|
+
@@eviction_instance = nil
|
194
|
+
|
195
|
+
# last time where eviction was launched
|
196
|
+
@@last_eviction_timestamp = nil
|
197
|
+
|
198
|
+
# Initialize plugin
|
199
|
+
public
|
200
|
+
def register
|
201
|
+
# process lambda expression to call in each filter call
|
202
|
+
eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
|
203
|
+
|
204
|
+
@@mutex.synchronize do
|
205
|
+
# define eviction_instance
|
206
|
+
if (@timeout > 0 && (@@eviction_instance.nil? || @timeout < @@eviction_instance.timeout))
|
207
|
+
@@eviction_instance = self
|
208
|
+
@logger.info("Aggregate, timeout: #{@timeout} seconds")
|
209
|
+
end
|
210
|
+
|
211
|
+
# load aggregate maps from file (if option defined)
|
212
|
+
if (!@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path))
|
213
|
+
File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps = Marshal.load(from_file) }
|
214
|
+
File.delete(@aggregate_maps_path)
|
215
|
+
@logger.info("Aggregate, load aggregate maps from : #{@aggregate_maps_path}")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# Called when logstash stops
|
221
|
+
public
|
222
|
+
def close
|
223
|
+
@@mutex.synchronize do
|
224
|
+
# store aggregate maps to file (if option defined)
|
225
|
+
if (!@aggregate_maps_path.nil? && !@@aggregate_maps.empty?)
|
226
|
+
File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
|
227
|
+
@@aggregate_maps.clear()
|
228
|
+
@logger.info("Aggregate, store aggregate maps to : #{@aggregate_maps_path}")
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# This method is invoked each time an event matches the filter
|
234
|
+
public
|
235
|
+
def filter(event)
|
236
|
+
|
237
|
+
# define task id
|
238
|
+
task_id = event.sprintf(@task_id)
|
239
|
+
return if task_id.nil? || task_id == @task_id
|
240
|
+
|
241
|
+
noError = false
|
242
|
+
|
243
|
+
# protect aggregate_maps against concurrent access, using a mutex
|
244
|
+
@@mutex.synchronize do
|
245
|
+
|
246
|
+
# retrieve the current aggregate map
|
247
|
+
aggregate_maps_element = @@aggregate_maps[task_id]
|
248
|
+
if (aggregate_maps_element.nil?)
|
249
|
+
return if @map_action == "update"
|
250
|
+
aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
|
251
|
+
@@aggregate_maps[task_id] = aggregate_maps_element
|
252
|
+
else
|
253
|
+
return if @map_action == "create"
|
254
|
+
end
|
255
|
+
map = aggregate_maps_element.map
|
256
|
+
|
257
|
+
# execute the code to read/update map and event
|
258
|
+
begin
|
259
|
+
@codeblock.call(event, map)
|
260
|
+
noError = true
|
261
|
+
rescue => exception
|
262
|
+
@logger.error("Aggregate exception occurred. Error: #{exception} ; Code: #{@code} ; Map: #{map} ; EventData: #{event.instance_variable_get('@data')}")
|
263
|
+
event.tag("_aggregateexception")
|
264
|
+
end
|
265
|
+
|
266
|
+
# delete the map if task is ended
|
267
|
+
@@aggregate_maps.delete(task_id) if @end_of_task
|
268
|
+
end
|
269
|
+
|
270
|
+
# match the filter, only if no error occurred
|
271
|
+
filter_matched(event) if noError
|
272
|
+
end
|
273
|
+
|
274
|
+
# Necessary to indicate logstash to periodically call 'flush' method
|
275
|
+
def periodic_flush
|
276
|
+
true
|
277
|
+
end
|
278
|
+
|
279
|
+
# This method is invoked by LogStash every 5 seconds.
|
280
|
+
def flush(options = {})
|
281
|
+
# Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
|
282
|
+
if (@@eviction_instance.nil?)
|
283
|
+
@@eviction_instance = self
|
284
|
+
@timeout = DEFAULT_TIMEOUT
|
285
|
+
end
|
286
|
+
|
287
|
+
# Launch eviction only every interval of (@timeout / 2) seconds
|
288
|
+
if (@@eviction_instance == self && (@@last_eviction_timestamp.nil? || Time.now > @@last_eviction_timestamp + @timeout / 2))
|
289
|
+
remove_expired_elements()
|
290
|
+
@@last_eviction_timestamp = Time.now
|
291
|
+
end
|
292
|
+
|
293
|
+
return nil
|
294
|
+
end
|
295
|
+
|
296
|
+
|
297
|
+
# Remove the expired Aggregate elements from "aggregate_maps" if they are older than timeout
|
298
|
+
def remove_expired_elements()
|
299
|
+
min_timestamp = Time.now - @timeout
|
300
|
+
@@mutex.synchronize do
|
301
|
+
@@aggregate_maps.delete_if { |key, element| element.creation_timestamp < min_timestamp }
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
end # class LogStash::Filters::Aggregate
|
306
|
+
|
307
|
+
# Element of "aggregate_maps"
|
308
|
+
class LogStash::Filters::Aggregate::Element
|
309
|
+
|
310
|
+
attr_accessor :creation_timestamp, :map
|
311
|
+
|
312
|
+
def initialize(creation_timestamp)
|
313
|
+
@creation_timestamp = creation_timestamp
|
314
|
+
@map = {}
|
315
|
+
end
|
316
|
+
end
|