logstash-filter-aggregate 2.0.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,288 +1,316 @@
1
- # encoding: utf-8
2
-
3
- require "logstash/filters/base"
4
- require "logstash/namespace"
5
- require "thread"
6
-
7
- #
8
- # The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
9
- # and finally push aggregated information into final task event.
10
- #
11
- # You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
12
- # correctly otherwise documents
13
- # may be processed out of sequence and unexpected results will occur.
14
- #
15
- # ==== Example #1
16
- #
17
- # * with these given logs :
18
- # [source,ruby]
19
- # ----------------------------------
20
- # INFO - 12345 - TASK_START - start
21
- # INFO - 12345 - SQL - sqlQuery1 - 12
22
- # INFO - 12345 - SQL - sqlQuery2 - 34
23
- # INFO - 12345 - TASK_END - end
24
- # ----------------------------------
25
- #
26
- # * you can aggregate "sql duration" for the whole task with this configuration :
27
- # [source,ruby]
28
- # ----------------------------------
29
- # filter {
30
- # grok {
31
- # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
32
- # }
33
- #
34
- # if [logger] == "TASK_START" {
35
- # aggregate {
36
- # task_id => "%{taskid}"
37
- # code => "map['sql_duration'] = 0"
38
- # map_action => "create"
39
- # }
40
- # }
41
- #
42
- # if [logger] == "SQL" {
43
- # aggregate {
44
- # task_id => "%{taskid}"
45
- # code => "map['sql_duration'] += event['duration']"
46
- # map_action => "update"
47
- # }
48
- # }
49
- #
50
- # if [logger] == "TASK_END" {
51
- # aggregate {
52
- # task_id => "%{taskid}"
53
- # code => "event['sql_duration'] = map['sql_duration']"
54
- # map_action => "update"
55
- # end_of_task => true
56
- # timeout => 120
57
- # }
58
- # }
59
- # }
60
- # ----------------------------------
61
- #
62
- # * the final event then looks like :
63
- # [source,ruby]
64
- # ----------------------------------
65
- # {
66
- # "message" => "INFO - 12345 - TASK_END - end message",
67
- # "sql_duration" => 46
68
- # }
69
- # ----------------------------------
70
- #
71
- # the field `sql_duration` is added and contains the sum of all sql queries durations.
72
- #
73
- # ==== Example #2
74
- #
75
- # * If you have the same logs than example #1, but without a start log :
76
- # [source,ruby]
77
- # ----------------------------------
78
- # INFO - 12345 - SQL - sqlQuery1 - 12
79
- # INFO - 12345 - SQL - sqlQuery2 - 34
80
- # INFO - 12345 - TASK_END - end
81
- # ----------------------------------
82
- #
83
- # * you can also aggregate "sql duration" with a slightly different configuration :
84
- # [source,ruby]
85
- # ----------------------------------
86
- # filter {
87
- # grok {
88
- # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
89
- # }
90
- #
91
- # if [logger] == "SQL" {
92
- # aggregate {
93
- # task_id => "%{taskid}"
94
- # code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
95
- # }
96
- # }
97
- #
98
- # if [logger] == "TASK_END" {
99
- # aggregate {
100
- # task_id => "%{taskid}"
101
- # code => "event['sql_duration'] = map['sql_duration']"
102
- # end_of_task => true
103
- # timeout => 120
104
- # }
105
- # }
106
- # }
107
- # ----------------------------------
108
- #
109
- # * the final event is exactly the same than example #1
110
- # * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
111
- #
112
- #
113
- # ==== How it works
114
- # * the filter needs a "task_id" to correlate events (log lines) of a same task
115
- # * at the task beggining, filter creates a map, attached to task_id
116
- # * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
117
- # * in the final event, you can execute a last code (for instance, add map data to final event)
118
- # * after the final event, the map attached to task is deleted
119
- # * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
120
- # * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
121
- # * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
122
- #
123
- #
124
- # ==== Use Cases
125
- # * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
126
- # * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
127
- # * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
128
- # * extract all http headers logged in several lines to push this list in final task event (complete http request info)
129
- # * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
130
- # * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
131
- #
132
- #
133
- class LogStash::Filters::Aggregate < LogStash::Filters::Base
134
-
135
- config_name "aggregate"
136
-
137
- # The expression defining task ID to correlate logs.
138
- #
139
- # This value must uniquely identify the task in the system.
140
- #
141
- # Example value : "%{application}%{my_task_id}"
142
- config :task_id, :validate => :string, :required => true
143
-
144
- # The code to execute to update map, using current event.
145
- #
146
- # Or on the contrary, the code to execute to update event, using current map.
147
- #
148
- # You will have a 'map' variable and an 'event' variable available (that is the event itself).
149
- #
150
- # Example value : "map['sql_duration'] += event['duration']"
151
- config :code, :validate => :string, :required => true
152
-
153
- # Tell the filter what to do with aggregate map.
154
- #
155
- # `create`: create the map, and execute the code only if map wasn't created before
156
- #
157
- # `update`: doesn't create the map, and execute the code only if map was created before
158
- #
159
- # `create_or_update`: create the map if it wasn't created before, execute the code in all cases
160
- config :map_action, :validate => :string, :default => "create_or_update"
161
-
162
- # Tell the filter that task is ended, and therefore, to delete map after code execution.
163
- config :end_of_task, :validate => :boolean, :default => false
164
-
165
- # The amount of seconds after a task "end event" can be considered lost.
166
- #
167
- # The task "map" is evicted.
168
- #
169
- # Default value (`0`) means no timeout so no auto eviction.
170
- config :timeout, :validate => :number, :required => false, :default => 0
171
-
172
-
173
- # Default timeout (in seconds) when not defined in plugin configuration
174
- DEFAULT_TIMEOUT = 1800
175
-
176
- # This is the state of the filter.
177
- # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
178
- @@aggregate_maps = {}
179
-
180
- # Mutex used to synchronize access to 'aggregate_maps'
181
- @@mutex = Mutex.new
182
-
183
- # Aggregate instance which will evict all zombie Aggregate elements (older than timeout)
184
- @@eviction_instance = nil
185
-
186
- # last time where eviction was launched
187
- @@last_eviction_timestamp = nil
188
-
189
- # Initialize plugin
190
- public
191
- def register
192
- # process lambda expression to call in each filter call
193
- eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
194
-
195
- # define eviction_instance
196
- @@mutex.synchronize do
197
- if (@timeout > 0 && (@@eviction_instance.nil? || @timeout < @@eviction_instance.timeout))
198
- @@eviction_instance = self
199
- @logger.info("Aggregate, timeout: #{@timeout} seconds")
200
- end
201
- end
202
- end
203
-
204
-
205
- # This method is invoked each time an event matches the filter
206
- public
207
- def filter(event)
208
-
209
- # define task id
210
- task_id = event.sprintf(@task_id)
211
- return if task_id.nil? || task_id == @task_id
212
-
213
- noError = false
214
-
215
- # protect aggregate_maps against concurrent access, using a mutex
216
- @@mutex.synchronize do
217
-
218
- # retrieve the current aggregate map
219
- aggregate_maps_element = @@aggregate_maps[task_id]
220
- if (aggregate_maps_element.nil?)
221
- return if @map_action == "update"
222
- aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
223
- @@aggregate_maps[task_id] = aggregate_maps_element
224
- else
225
- return if @map_action == "create"
226
- end
227
- map = aggregate_maps_element.map
228
-
229
- # execute the code to read/update map and event
230
- begin
231
- @codeblock.call(event, map)
232
- noError = true
233
- rescue => exception
234
- @logger.error("Aggregate exception occurred. Error: #{exception} ; Code: #{@code} ; Map: #{map} ; EventData: #{event.instance_variable_get('@data')}")
235
- event.tag("_aggregateexception")
236
- end
237
-
238
- # delete the map if task is ended
239
- @@aggregate_maps.delete(task_id) if @end_of_task
240
- end
241
-
242
- # match the filter, only if no error occurred
243
- filter_matched(event) if noError
244
- end
245
-
246
- # Necessary to indicate logstash to periodically call 'flush' method
247
- def periodic_flush
248
- true
249
- end
250
-
251
- # This method is invoked by LogStash every 5 seconds.
252
- def flush(options = {})
253
- # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
254
- if (@@eviction_instance.nil?)
255
- @@eviction_instance = self
256
- @timeout = DEFAULT_TIMEOUT
257
- end
258
-
259
- # Launch eviction only every interval of (@timeout / 2) seconds
260
- if (@@eviction_instance == self && (@@last_eviction_timestamp.nil? || Time.now > @@last_eviction_timestamp + @timeout / 2))
261
- remove_expired_elements()
262
- @@last_eviction_timestamp = Time.now
263
- end
264
-
265
- return nil
266
- end
267
-
268
-
269
- # Remove the expired Aggregate elements from "aggregate_maps" if they are older than timeout
270
- def remove_expired_elements()
271
- min_timestamp = Time.now - @timeout
272
- @@mutex.synchronize do
273
- @@aggregate_maps.delete_if { |key, element| element.creation_timestamp < min_timestamp }
274
- end
275
- end
276
-
277
- end # class LogStash::Filters::Aggregate
278
-
279
- # Element of "aggregate_maps"
280
- class LogStash::Filters::Aggregate::Element
281
-
282
- attr_accessor :creation_timestamp, :map
283
-
284
- def initialize(creation_timestamp)
285
- @creation_timestamp = creation_timestamp
286
- @map = {}
287
- end
288
- end
1
+ # encoding: utf-8
2
+
3
+ require "logstash/filters/base"
4
+ require "logstash/namespace"
5
+ require "thread"
6
+
7
+ #
8
+ # The aim of this filter is to aggregate information available among several events (typically log lines) belonging to a same task,
9
+ # and finally push aggregated information into final task event.
10
+ #
11
+ # You should be very careful to set logstash filter workers to 1 (`-w 1` flag) for this filter to work
12
+ # correctly otherwise documents
13
+ # may be processed out of sequence and unexpected results will occur.
14
+ #
15
+ # ==== Example #1
16
+ #
17
+ # * with these given logs :
18
+ # [source,ruby]
19
+ # ----------------------------------
20
+ # INFO - 12345 - TASK_START - start
21
+ # INFO - 12345 - SQL - sqlQuery1 - 12
22
+ # INFO - 12345 - SQL - sqlQuery2 - 34
23
+ # INFO - 12345 - TASK_END - end
24
+ # ----------------------------------
25
+ #
26
+ # * you can aggregate "sql duration" for the whole task with this configuration :
27
+ # [source,ruby]
28
+ # ----------------------------------
29
+ # filter {
30
+ # grok {
31
+ # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
32
+ # }
33
+ #
34
+ # if [logger] == "TASK_START" {
35
+ # aggregate {
36
+ # task_id => "%{taskid}"
37
+ # code => "map['sql_duration'] = 0"
38
+ # map_action => "create"
39
+ # }
40
+ # }
41
+ #
42
+ # if [logger] == "SQL" {
43
+ # aggregate {
44
+ # task_id => "%{taskid}"
45
+ # code => "map['sql_duration'] += event['duration']"
46
+ # map_action => "update"
47
+ # }
48
+ # }
49
+ #
50
+ # if [logger] == "TASK_END" {
51
+ # aggregate {
52
+ # task_id => "%{taskid}"
53
+ # code => "event['sql_duration'] = map['sql_duration']"
54
+ # map_action => "update"
55
+ # end_of_task => true
56
+ # timeout => 120
57
+ # }
58
+ # }
59
+ # }
60
+ # ----------------------------------
61
+ #
62
+ # * the final event then looks like :
63
+ # [source,ruby]
64
+ # ----------------------------------
65
+ # {
66
+ # "message" => "INFO - 12345 - TASK_END - end message",
67
+ # "sql_duration" => 46
68
+ # }
69
+ # ----------------------------------
70
+ #
71
+ # the field `sql_duration` is added and contains the sum of all sql queries durations.
72
+ #
73
+ # ==== Example #2
74
+ #
75
+ # * If you have the same logs than example #1, but without a start log :
76
+ # [source,ruby]
77
+ # ----------------------------------
78
+ # INFO - 12345 - SQL - sqlQuery1 - 12
79
+ # INFO - 12345 - SQL - sqlQuery2 - 34
80
+ # INFO - 12345 - TASK_END - end
81
+ # ----------------------------------
82
+ #
83
+ # * you can also aggregate "sql duration" with a slightly different configuration :
84
+ # [source,ruby]
85
+ # ----------------------------------
86
+ # filter {
87
+ # grok {
88
+ # match => [ "message", "%{LOGLEVEL:loglevel} - %{NOTSPACE:taskid} - %{NOTSPACE:logger} - %{WORD:label}( - %{INT:duration:int})?" ]
89
+ # }
90
+ #
91
+ # if [logger] == "SQL" {
92
+ # aggregate {
93
+ # task_id => "%{taskid}"
94
+ # code => "map['sql_duration'] ||= 0 ; map['sql_duration'] += event['duration']"
95
+ # }
96
+ # }
97
+ #
98
+ # if [logger] == "TASK_END" {
99
+ # aggregate {
100
+ # task_id => "%{taskid}"
101
+ # code => "event['sql_duration'] = map['sql_duration']"
102
+ # end_of_task => true
103
+ # timeout => 120
104
+ # }
105
+ # }
106
+ # }
107
+ # ----------------------------------
108
+ #
109
+ # * the final event is exactly the same than example #1
110
+ # * the key point is the "||=" ruby operator. It allows to initialize 'sql_duration' map entry to 0 only if this map entry is not already initialized
111
+ #
112
+ #
113
+ # ==== How it works
114
+ # * the filter needs a "task_id" to correlate events (log lines) of a same task
115
+ # * at the task beggining, filter creates a map, attached to task_id
116
+ # * for each event, you can execute code using 'event' and 'map' (for instance, copy an event field to map)
117
+ # * in the final event, you can execute a last code (for instance, add map data to final event)
118
+ # * after the final event, the map attached to task is deleted
119
+ # * in one filter configuration, it is recommanded to define a timeout option to protect the feature against unterminated tasks. It tells the filter to delete expired maps
120
+ # * if no timeout is defined, by default, all maps older than 1800 seconds are automatically deleted
121
+ # * finally, if `code` execution raises an exception, the error is logged and event is tagged '_aggregateexception'
122
+ #
123
+ #
124
+ # ==== Use Cases
125
+ # * extract some cool metrics from task logs and push them into task final log event (like in example #1 and #2)
126
+ # * extract error information in any task log line, and push it in final task event (to get a final document with all error information if any)
127
+ # * extract all back-end calls as a list, and push this list in final task event (to get a task profile)
128
+ # * extract all http headers logged in several lines to push this list in final task event (complete http request info)
129
+ # * for every back-end call, collect call details available on several lines, analyse it and finally tag final back-end call log line (error, timeout, business-warning, ...)
130
+ # * Finally, task id can be any correlation id matching your need : it can be a session id, a file path, ...
131
+ #
132
+ #
133
+ class LogStash::Filters::Aggregate < LogStash::Filters::Base
134
+
135
+ config_name "aggregate"
136
+
137
+ # The expression defining task ID to correlate logs.
138
+ #
139
+ # This value must uniquely identify the task in the system.
140
+ #
141
+ # Example value : "%{application}%{my_task_id}"
142
+ config :task_id, :validate => :string, :required => true
143
+
144
+ # The code to execute to update map, using current event.
145
+ #
146
+ # Or on the contrary, the code to execute to update event, using current map.
147
+ #
148
+ # You will have a 'map' variable and an 'event' variable available (that is the event itself).
149
+ #
150
+ # Example value : `"map['sql_duration'] += event['duration']"`
151
+ config :code, :validate => :string, :required => true
152
+
153
+ # Tell the filter what to do with aggregate map.
154
+ #
155
+ # `create`: create the map, and execute the code only if map wasn't created before
156
+ #
157
+ # `update`: doesn't create the map, and execute the code only if map was created before
158
+ #
159
+ # `create_or_update`: create the map if it wasn't created before, execute the code in all cases
160
+ config :map_action, :validate => :string, :default => "create_or_update"
161
+
162
+ # Tell the filter that task is ended, and therefore, to delete map after code execution.
163
+ config :end_of_task, :validate => :boolean, :default => false
164
+
165
+ # The amount of seconds after a task "end event" can be considered lost.
166
+ #
167
+ # When timeout occurs for a task, The task "map" is evicted.
168
+ #
169
+ # Default value (`0`) means no timeout so no auto eviction.
170
+ config :timeout, :validate => :number, :required => false, :default => 0
171
+
172
+ # The path to file where aggregate maps are stored when logstash stops
173
+ # and are loaded from when logstash starts.
174
+ #
175
+ # If not defined, aggregate maps will not be stored at logstash stop and will be lost.
176
+ # Should be defined for only one aggregate filter (as aggregate maps are global).
177
+ #
178
+ # Example value : `"/path/to/.aggregate_maps"`
179
+ config :aggregate_maps_path, :validate => :string, :required => false
180
+
181
+
182
+ # Default timeout (in seconds) when not defined in plugin configuration
183
+ DEFAULT_TIMEOUT = 1800
184
+
185
+ # This is the state of the filter.
186
+ # For each entry, key is "task_id" and value is a map freely updatable by 'code' config
187
+ @@aggregate_maps = {}
188
+
189
+ # Mutex used to synchronize access to 'aggregate_maps'
190
+ @@mutex = Mutex.new
191
+
192
+ # Aggregate instance which will evict all zombie Aggregate elements (older than timeout)
193
+ @@eviction_instance = nil
194
+
195
+ # last time where eviction was launched
196
+ @@last_eviction_timestamp = nil
197
+
198
+ # Initialize plugin
199
+ public
200
+ def register
201
+ # process lambda expression to call in each filter call
202
+ eval("@codeblock = lambda { |event, map| #{@code} }", binding, "(aggregate filter code)")
203
+
204
+ @@mutex.synchronize do
205
+ # define eviction_instance
206
+ if (@timeout > 0 && (@@eviction_instance.nil? || @timeout < @@eviction_instance.timeout))
207
+ @@eviction_instance = self
208
+ @logger.info("Aggregate, timeout: #{@timeout} seconds")
209
+ end
210
+
211
+ # load aggregate maps from file (if option defined)
212
+ if (!@aggregate_maps_path.nil? && File.exist?(@aggregate_maps_path))
213
+ File.open(@aggregate_maps_path, "r") { |from_file| @@aggregate_maps = Marshal.load(from_file) }
214
+ File.delete(@aggregate_maps_path)
215
+ @logger.info("Aggregate, load aggregate maps from : #{@aggregate_maps_path}")
216
+ end
217
+ end
218
+ end
219
+
220
+ # Called when logstash stops
221
+ public
222
+ def close
223
+ @@mutex.synchronize do
224
+ # store aggregate maps to file (if option defined)
225
+ if (!@aggregate_maps_path.nil? && !@@aggregate_maps.empty?)
226
+ File.open(@aggregate_maps_path, "w"){ |to_file| Marshal.dump(@@aggregate_maps, to_file) }
227
+ @@aggregate_maps.clear()
228
+ @logger.info("Aggregate, store aggregate maps to : #{@aggregate_maps_path}")
229
+ end
230
+ end
231
+ end
232
+
233
+ # This method is invoked each time an event matches the filter
234
+ public
235
+ def filter(event)
236
+
237
+ # define task id
238
+ task_id = event.sprintf(@task_id)
239
+ return if task_id.nil? || task_id == @task_id
240
+
241
+ noError = false
242
+
243
+ # protect aggregate_maps against concurrent access, using a mutex
244
+ @@mutex.synchronize do
245
+
246
+ # retrieve the current aggregate map
247
+ aggregate_maps_element = @@aggregate_maps[task_id]
248
+ if (aggregate_maps_element.nil?)
249
+ return if @map_action == "update"
250
+ aggregate_maps_element = LogStash::Filters::Aggregate::Element.new(Time.now);
251
+ @@aggregate_maps[task_id] = aggregate_maps_element
252
+ else
253
+ return if @map_action == "create"
254
+ end
255
+ map = aggregate_maps_element.map
256
+
257
+ # execute the code to read/update map and event
258
+ begin
259
+ @codeblock.call(event, map)
260
+ noError = true
261
+ rescue => exception
262
+ @logger.error("Aggregate exception occurred. Error: #{exception} ; Code: #{@code} ; Map: #{map} ; EventData: #{event.instance_variable_get('@data')}")
263
+ event.tag("_aggregateexception")
264
+ end
265
+
266
+ # delete the map if task is ended
267
+ @@aggregate_maps.delete(task_id) if @end_of_task
268
+ end
269
+
270
+ # match the filter, only if no error occurred
271
+ filter_matched(event) if noError
272
+ end
273
+
274
+ # Necessary to indicate logstash to periodically call 'flush' method
275
+ def periodic_flush
276
+ true
277
+ end
278
+
279
+ # This method is invoked by LogStash every 5 seconds.
280
+ def flush(options = {})
281
+ # Protection against no timeout defined by logstash conf : define a default eviction instance with timeout = DEFAULT_TIMEOUT seconds
282
+ if (@@eviction_instance.nil?)
283
+ @@eviction_instance = self
284
+ @timeout = DEFAULT_TIMEOUT
285
+ end
286
+
287
+ # Launch eviction only every interval of (@timeout / 2) seconds
288
+ if (@@eviction_instance == self && (@@last_eviction_timestamp.nil? || Time.now > @@last_eviction_timestamp + @timeout / 2))
289
+ remove_expired_elements()
290
+ @@last_eviction_timestamp = Time.now
291
+ end
292
+
293
+ return nil
294
+ end
295
+
296
+
297
+ # Remove the expired Aggregate elements from "aggregate_maps" if they are older than timeout
298
+ def remove_expired_elements()
299
+ min_timestamp = Time.now - @timeout
300
+ @@mutex.synchronize do
301
+ @@aggregate_maps.delete_if { |key, element| element.creation_timestamp < min_timestamp }
302
+ end
303
+ end
304
+
305
+ end # class LogStash::Filters::Aggregate
306
+
307
+ # Element of "aggregate_maps"
308
+ class LogStash::Filters::Aggregate::Element
309
+
310
+ attr_accessor :creation_timestamp, :map
311
+
312
+ def initialize(creation_timestamp)
313
+ @creation_timestamp = creation_timestamp
314
+ @map = {}
315
+ end
316
+ end