logstash-filter-aggregate 2.0.5 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,288 +1,316 @@
1
- # encoding: utf-8
2
-
3
- require "logstash/filters/base"
4
- require "logstash/namespace"
5
- require "thread"
6
-
7
- #
8
- # The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
9
- # and finally push aggregated information into final task event.
10
- #
11
- # You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
12
- # correctly otherwise documents
13
- # may be processed out of sequence and unexpected results will occur.
14
- #
15
- # ==== Example #1
16
- #
17
- # * with these given logs :
18
- # [source,ruby]
19
- # ----------------------------------
20
- # INFO - 12345 - TASK_START - start
21
- # INFO - 12345 - SQL - sqlQuery1 - 12
22
- # INFO - 12345 - SQL - sqlQuery2 - 34
23
- # INFO - 12345 - TASK_END - end
24
- # ----------------------------------
25
- #
26
- # * you can aggregate "sql duration" for the whole task with this configuration :
27
- # [source,ruby]
28
- # ----------------------------------
29
- # filter {
30
- # grok {
31
- # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
32
- # }
33
- #
34
- # if [logger] == "TASK_START" {
35
- # aggregate {
36
- # task_id => "%{taskid}"
37
- # code => "map['sql_duration'] = 0"
38
- # map_action => "create"
39
- # }
40
- # }
41
- #
42
- # if [logger] == "SQL" {
43
- # aggregate {
44
- # task_id => "%{taskid}"
45
- # code => "map['sql_duration'] += event['duration']"
46
- # map_action => "update"
47
- # }
48
- # }
49
- #
50
- # if [logger] == "TASK_END" {
51
- # aggregate {
52
- # task_id => "%{taskid}"
53
- # code => "event['sql_duration'] = map['sql_duration']"
54
- # map_action => "update"
55
- # end_of_task => true
56
- # timeout => 120
57
- # }
58
- # }
59
- # }
60
- # ----------------------------------
61
- #
62
- # * the final event then looks like :
63
- # [source,ruby]
64
- # ----------------------------------
65
- # {
66
- # "message" => "INFO - 12345 - TASK_END - end message",
67
- # "sql_duration" => 46
68
- # }
69
- # ----------------------------------
70
- #
71
- # the field `sql_duration` is added and contains the sum of all sql queries durations.
72
- #
73
- # ==== Example #2
74
- #
75
- # * If you have the same logs than example #1, but without a start log :
76
- # [source,ruby]
77
- # ----------------------------------
78
- # INFO - 12345 - SQL - sqlQuery1 - 12
79
- # INFO - 12345 - SQL - sqlQuery2 - 34
80
- # INFO - 12345 - TASK_END - end
81
- # ----------------------------------
82
- #
83
- # * you can also aggregate "sql duration" with a slightly different configuration :
84
- # [source,ruby]
85
- # ----------------------------------
86
- # filter {
87
- # grok {
88
- # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
89
- # }
90
- #
91
- # if [logger] == "SQL" {
92
- # aggregate {
93
- # task_id => "%{taskid}"
94
- # code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
95
- # }
96
- # }
97
- #
98
- # if [logger] == "TASK_END" {
99
- # aggregate {
100
- # task_id => "%{taskid}"
101
- # code => "event['sql_duration'] = map['sql_duration']"
102
- # end_of_task => true
103
- # timeout => 120
104
- # }
105
- # }
106
- # }
107
- # ----------------------------------
108
- #
109
- # * the final event is exactly the same than example #1
110
- # * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
111
- #
112
- #
113
- # ==== How it works
114
- # * the filter needs a "task_id" to correlate events (log lines) of a same task
115
- # * at the task beggining, filter creates a map, attached to task_id
116
- # * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
117
- # * in the final event, you can execute a last code (for instance, add map data to final event)
118
- # * after the final event, the map attached to task is deleted
119
- # * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
120
- # * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
121
- # * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
122
- #
123
- #
124
- # ==== Use Cases
125
- # * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
126
- # * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
127
- # * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
128
- # * extract all http headers logged in several lines to push this list in final task event (complete http request info)
129
- # * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
130
- # * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
131
- #
132
- #
133
- class LogStash::Filters::Aggregate < LogStash::Filters::Base
134
-
135
- config_name "aggregate"
136
-
137
- # The expression defining task ID to correlate logs.
138
- #
139
- # This value must uniquely identify the task in the system.
140
- #
141
- # Example value : "%{application}%{my_task_id}"
142
- config :task_id, :validate => :string, :required => true
143
-
144
- # The code to execute to update map, using current event.
145
- #
146
- # Or on the contrary, the code to execute to update event, using current map.
147
- #
148
- # You will have a 'map' variable and an 'event' variable available (that is the event itself).
149
- #
150
- # Example value : "map['sql_duration'] += event['duration']"
151
- config :code, :validate => :string, :required => true
152
-
153
- # Tell the filter what to do with aggregate map.
154
- #
155
- # `create`: create the map, and execute the code only if map wasn't created before
156
- #
157
- # `update`: doesn't create the map, and execute the code only if map was created before
158
- #
159
- # `create_or_update`: create the map if it wasn't created before, execute the code in all cases
160
- config :map_action, :validate => :string, :default => "create_or_update"
161
-
162
- # Tell the filter that task is ended, and therefore, to delete map after code execution.
163
- config :end_of_task, :validate => :boolean, :default => false
164
-
165
- # The amount of seconds after a task "end event" can be considered lost.
166
- #
167
- # The task "map" is evicted.
168
- #
169
- # Default value (`0`) means no timeout so no auto eviction.
170
- config :timeout, :validate => :number, :required => false, :default => 0
171
-
172
-
173
- # Default timeout (in seconds) when not defined in plugin configuration
174
- DEFAULT_TIMEOUT = 1800
175
-
176
- # This is the state of the filter.
177
- # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
178
- @@aggregate_maps = {}
179
-
180
- # Mutex used to synchronize access to 'aggregate_maps'
181
- @@mutex = Mutex.new
182
-
183
- # Aggregate instance which will evict all zombie Aggregate elements (older than timeout)
184
- @@eviction_instance = nil
185
-
186
- # last time where eviction was launched
187
- @@last_eviction_timestamp = nil
188
-
189
- # Initialize plugin
190
- public
191
- def register
192
- # process lambda expression to call in each filter call
193
- eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
194
-
195
- # define eviction_instance
196
- @@mutex.synchronize do
197
- if (@timeout > 0 && (@@eviction_instance.nil? || @timeout < @@eviction_instance.timeout))
198
- @@eviction_instance = self
199
- @logger.info("Aggregate, timeout: #{@timeout} seconds")
200
- end
201
- end
202
- end
203
-
204
-
205
- # This method is invoked each time an event matches the filter
206
- public
207
- def filter(event)
208
-
209
- # define task id
210
- task_id = event.sprintf(@task_id)
211
- return if task_id.nil? || task_id == @task_id
212
-
213
- noError = false
214
-
215
- # protect aggregate_maps against concurrent access, using a mutex
216
- @@mutex.synchronize do
217
-
218
- # retrieve the current aggregate map
219
- aggregate_maps_element = @@aggregate_maps[task_id]
220
- if (aggregate_maps_element.nil?)
221
- return if @map_action == "update"
222
- aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
223
- @@aggregate_maps[task_id] = aggregate_maps_element
224
- else
225
- return if @map_action == "create"
226
- end
227
- map = aggregate_maps_element.map
228
-
229
- # execute the code to read/update map and event
230
- begin
231
- @codeblock.call(event, map)
232
- noError = true
233
- rescue => exception
234
- @logger.error("Aggregate exception occurred. Error: #{exception} ; Code: #{@code} ; Map: #{map} ; EventData: #{event.instance_variable_get('@data')}")
235
- event.tag("_aggregateexception")
236
- end
237
-
238
- # delete the map if task is ended
239
- @@aggregate_maps.delete(task_id) if @end_of_task
240
- end
241
-
242
- # match the filter, only if no error occurred
243
- filter_matched(event) if noError
244
- end
245
-
246
- # Necessary to indicate logstash to periodically call 'flush' method
247
- def periodic_flush
248
- true
249
- end
250
-
251
- # This method is invoked by LogStash every 5 seconds.
252
- def flush(options = {})
253
- # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
254
- if (@@eviction_instance.nil?)
255
- @@eviction_instance = self
256
- @timeout = DEFAULT_TIMEOUT
257
- end
258
-
259
- # Launch eviction only every interval of (@timeout / 2) seconds
260
- if (@@eviction_instance == self && (@@last_eviction_timestamp.nil? || Time.now > @@last_eviction_timestamp + @timeout / 2))
261
- remove_expired_elements()
262
- @@last_eviction_timestamp = Time.now
263
- end
264
-
265
- return nil
266
- end
267
-
268
-
269
- # Remove the expired Aggregate elements from "aggregate_maps" if they are older than timeout
270
- def remove_expired_elements()
271
- min_timestamp = Time.now - @timeout
272
- @@mutex.synchronize do
273
- @@aggregate_maps.delete_if { |key, element| element.creation_timestamp < min_timestamp }
274
- end
275
- end
276
-
277
- end # class LogStash::Filters::Aggregate
278
-
279
- # Element of "aggregate_maps"
280
- class LogStash::Filters::Aggregate::Element
281
-
282
- attr_accessor :creation_timestamp, :map
283
-
284
- def initialize(creation_timestamp)
285
- @creation_timestamp = creation_timestamp
286
- @map = {}
287
- end
288
- end
1
+ # encoding: utf-8
2
+
3
+ require "logstash/filters/base"
4
+ require "logstash/namespace"
5
+ require "thread"
6
+
7
+ #
8
+ # The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
9
+ # and finally push aggregated information into final task event.
10
+ #
11
+ # You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
12
+ # correctly otherwise documents
13
+ # may be processed out of sequence and unexpected results will occur.
14
+ #
15
+ # ==== Example #1
16
+ #
17
+ # * with these given logs :
18
+ # [source,ruby]
19
+ # ----------------------------------
20
+ # INFO - 12345 - TASK_START - start
21
+ # INFO - 12345 - SQL - sqlQuery1 - 12
22
+ # INFO - 12345 - SQL - sqlQuery2 - 34
23
+ # INFO - 12345 - TASK_END - end
24
+ # ----------------------------------
25
+ #
26
+ # * you can aggregate "sql duration" for the whole task with this configuration :
27
+ # [source,ruby]
28
+ # ----------------------------------
29
+ # filter {
30
+ # grok {
31
+ # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
32
+ # }
33
+ #
34
+ # if [logger] == "TASK_START" {
35
+ # aggregate {
36
+ # task_id => "%{taskid}"
37
+ # code => "map['sql_duration'] = 0"
38
+ # map_action => "create"
39
+ # }
40
+ # }
41
+ #
42
+ # if [logger] == "SQL" {
43
+ # aggregate {
44
+ # task_id => "%{taskid}"
45
+ # code => "map['sql_duration'] += event['duration']"
46
+ # map_action => "update"
47
+ # }
48
+ # }
49
+ #
50
+ # if [logger] == "TASK_END" {
51
+ # aggregate {
52
+ # task_id => "%{taskid}"
53
+ # code => "event['sql_duration'] = map['sql_duration']"
54
+ # map_action => "update"
55
+ # end_of_task => true
56
+ # timeout => 120
57
+ # }
58
+ # }
59
+ # }
60
+ # ----------------------------------
61
+ #
62
+ # * the final event then looks like :
63
+ # [source,ruby]
64
+ # ----------------------------------
65
+ # {
66
+ # "message" => "INFO - 12345 - TASK_END - end message",
67
+ # "sql_duration" => 46
68
+ # }
69
+ # ----------------------------------
70
+ #
71
+ # the field `sql_duration` is added and contains the sum of all sql queries durations.
72
+ #
73
+ # ==== Example #2
74
+ #
75
+ # * If you have the same logs than example #1, but without a start log :
76
+ # [source,ruby]
77
+ # ----------------------------------
78
+ # INFO - 12345 - SQL - sqlQuery1 - 12
79
+ # INFO - 12345 - SQL - sqlQuery2 - 34
80
+ # INFO - 12345 - TASK_END - end
81
+ # ----------------------------------
82
+ #
83
+ # * you can also aggregate "sql duration" with a slightly different configuration :
84
+ # [source,ruby]
85
+ # ----------------------------------
86
+ # filter {
87
+ # grok {
88
+ # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
89
+ # }
90
+ #
91
+ # if [logger] == "SQL" {
92
+ # aggregate {
93
+ # task_id => "%{taskid}"
94
+ # code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
95
+ # }
96
+ # }
97
+ #
98
+ # if [logger] == "TASK_END" {
99
+ # aggregate {
100
+ # task_id => "%{taskid}"
101
+ # code => "event['sql_duration'] = map['sql_duration']"
102
+ # end_of_task => true
103
+ # timeout => 120
104
+ # }
105
+ # }
106
+ # }
107
+ # ----------------------------------
108
+ #
109
+ # * the final event is exactly the same than example #1
110
+ # * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
111
+ #
112
+ #
113
+ # ==== How it works
114
+ # * the filter needs a "task_id" to correlate events (log lines) of a same task
115
+ # * at the task beggining, filter creates a map, attached to task_id
116
+ # * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
117
+ # * in the final event, you can execute a last code (for instance, add map data to final event)
118
+ # * after the final event, the map attached to task is deleted
119
+ # * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
120
+ # * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
121
+ # * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
122
+ #
123
+ #
124
+ # ==== Use Cases
125
+ # * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
126
+ # * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
127
+ # * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
128
+ # * extract all http headers logged in several lines to push this list in final task event (complete http request info)
129
+ # * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
130
+ # * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
131
+ #
132
+ #
133
+ class LogStash::Filters::Aggregate < LogStash::Filters::Base
134
+
135
+ config_name "aggregate"
136
+
137
+ # The expression defining task ID to correlate logs.
138
+ #
139
+ # This value must uniquely identify the task in the system.
140
+ #
141
+ # Example value : "%{application}%{my_task_id}"
142
+ config :task_id, :validate => :string, :required => true
143
+
144
+ # The code to execute to update map, using current event.
145
+ #
146
+ # Or on the contrary, the code to execute to update event, using current map.
147
+ #
148
+ # You will have a 'map' variable and an 'event' variable available (that is the event itself).
149
+ #
150
+ # Example value : `"map['sql_duration'] += event['duration']"`
151
+ config :code, :validate => :string, :required => true
152
+
153
+ # Tell the filter what to do with aggregate map.
154
+ #
155
+ # `create`: create the map, and execute the code only if map wasn't created before
156
+ #
157
+ # `update`: doesn't create the map, and execute the code only if map was created before
158
+ #
159
+ # `create_or_update`: create the map if it wasn't created before, execute the code in all cases
160
+ config :map_action, :validate => :string, :default => "create_or_update"
161
+
162
+ # Tell the filter that task is ended, and therefore, to delete map after code execution.
163
+ config :end_of_task, :validate => :boolean, :default => false
164
+
165
+ # The amount of seconds after a task "end event" can be considered lost.
166
+ #
167
+ # When timeout occurs for a task, The task "map" is evicted.
168
+ #
169
+ # Default value (`0`) means no timeout so no auto eviction.
170
+ config :timeout, :validate => :number, :required => false, :default => 0
171
+
172
+ # The path to file where aggregate maps are stored when logstash stops
173
+ # and are loaded from when logstash starts.
174
+ #
175
+ # If not defined, aggregate maps will not be stored at logstash stop and will be lost.
176
+ # Should be defined for only one aggregate filter (as aggregate maps are global).
177
+ #
178
+ # Example value : `"/path/to/.aggregate_maps"`
179
+ config :aggregate_maps_path, :validate => :string, :required => false
180
+
181
+
182
+ # Default timeout (in seconds) when not defined in plugin configuration
183
+ DEFAULT_TIMEOUT = 1800
184
+
185
+ # This is the state of the filter.
186
+ # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
187
+ @@aggregate_maps = {}
188
+
189
+ # Mutex used to synchronize access to 'aggregate_maps'
190
+ @@mutex = Mutex.new
191
+
192
+ # Aggregate instance which will evict all zombie Aggregate elements (older than timeout)
193
+ @@eviction_instance = nil
194
+
195
+ # last time where eviction was launched
196
+ @@last_eviction_timestamp = nil
197
+
198
+ # Initialize plugin
199
+ public
200
+ def register
201
+ # process lambda expression to call in each filter call
202
+ eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
203
+
204
+ @@mutex.synchronize do
205
+ # define eviction_instance
206
+ if (@timeout > 0 && (@@eviction_instance.nil? || @timeout < @@eviction_instance.timeout))
207
+ @@eviction_instance = self
208
+ @logger.info("Aggregate, timeout: #{@timeout} seconds")
209
+ end
210
+
211
+ # load aggregate maps from file (if option defined)
212
+ if (!@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path))
213
+ File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps = Marshal.load(from_file) }
214
+ File.delete(@aggregate_maps_path)
215
+ @logger.info("Aggregate, load aggregate maps from : #{@aggregate_maps_path}")
216
+ end
217
+ end
218
+ end
219
+
220
+ # Called when logstash stops
221
+ public
222
+ def close
223
+ @@mutex.synchronize do
224
+ # store aggregate maps to file (if option defined)
225
+ if (!@aggregate_maps_path.nil? && !@@aggregate_maps.empty?)
226
+ File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
227
+ @@aggregate_maps.clear()
228
+ @logger.info("Aggregate, store aggregate maps to : #{@aggregate_maps_path}")
229
+ end
230
+ end
231
+ end
232
+
233
+ # This method is invoked each time an event matches the filter
234
+ public
235
+ def filter(event)
236
+
237
+ # define task id
238
+ task_id = event.sprintf(@task_id)
239
+ return if task_id.nil? || task_id == @task_id
240
+
241
+ noError = false
242
+
243
+ # protect aggregate_maps against concurrent access, using a mutex
244
+ @@mutex.synchronize do
245
+
246
+ # retrieve the current aggregate map
247
+ aggregate_maps_element = @@aggregate_maps[task_id]
248
+ if (aggregate_maps_element.nil?)
249
+ return if @map_action == "update"
250
+ aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
251
+ @@aggregate_maps[task_id] = aggregate_maps_element
252
+ else
253
+ return if @map_action == "create"
254
+ end
255
+ map = aggregate_maps_element.map
256
+
257
+ # execute the code to read/update map and event
258
+ begin
259
+ @codeblock.call(event, map)
260
+ noError = true
261
+ rescue => exception
262
+ @logger.error("Aggregate exception occurred. Error: #{exception} ; Code: #{@code} ; Map: #{map} ; EventData: #{event.instance_variable_get('@data')}")
263
+ event.tag("_aggregateexception")
264
+ end
265
+
266
+ # delete the map if task is ended
267
+ @@aggregate_maps.delete(task_id) if @end_of_task
268
+ end
269
+
270
+ # match the filter, only if no error occurred
271
+ filter_matched(event) if noError
272
+ end
273
+
274
+ # Necessary to indicate logstash to periodically call 'flush' method
275
+ def periodic_flush
276
+ true
277
+ end
278
+
279
+ # This method is invoked by LogStash every 5 seconds.
280
+ def flush(options = {})
281
+ # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
282
+ if (@@eviction_instance.nil?)
283
+ @@eviction_instance = self
284
+ @timeout = DEFAULT_TIMEOUT
285
+ end
286
+
287
+ # Launch eviction only every interval of (@timeout / 2) seconds
288
+ if (@@eviction_instance == self && (@@last_eviction_timestamp.nil? || Time.now > @@last_eviction_timestamp + @timeout / 2))
289
+ remove_expired_elements()
290
+ @@last_eviction_timestamp = Time.now
291
+ end
292
+
293
+ return nil
294
+ end
295
+
296
+
297
+ # Remove the expired Aggregate elements from "aggregate_maps" if they are older than timeout
298
+ def remove_expired_elements()
299
+ min_timestamp = Time.now - @timeout
300
+ @@mutex.synchronize do
301
+ @@aggregate_maps.delete_if { |key, element| element.creation_timestamp < min_timestamp }
302
+ end
303
+ end
304
+
305
+ end # class LogStash::Filters::Aggregate
306
+
307
+ # Element of "aggregate_maps"
308
+ class LogStash::Filters::Aggregate::Element
309
+
310
+ attr_accessor :creation_timestamp, :map
311
+
312
+ def initialize(creation_timestamp)
313
+ @creation_timestamp = creation_timestamp
314
+ @map = {}
315
+ end
316
+ end