fluent-plugin-detect-ft-memb-exceptions 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = 'fluent-plugin-detect-ft-memb-exceptions'
3
+ gem.description = <<-eos
4
+ Fluentd output plugin which detects ft membership specific exception stack traces in a stream of
5
+ JSON log messages and combines all single-line messages that belong to the
6
+ same stack trace into one multi-line message.
7
+ This is an adaption of an official Google Ruby gem.
8
+ eos
9
+ gem.summary = \
10
+ 'fluentd output plugin for combining stack traces as multi-line JSON logs'
11
+ gem.homepage = \
12
+ 'https://github.com/Financial-Times/fluent-plugin-detect-exceptions'
13
+ gem.license = 'Apache-2.0'
14
+ gem.version = '0.0.3'
15
+ gem.authors = ['Naomi stern']
16
+ gem.email = ['sally.dixon@ft.com']
17
+ gem.required_ruby_version = Gem::Requirement.new('>= 2.0')
18
+
19
+ gem.files = Dir['**/*'].keep_if { |file| File.file?(file) }
20
+ gem.test_files = gem.files.grep(/^(test)/)
21
+ gem.require_paths = ['lib']
22
+
23
+ gem.add_runtime_dependency 'fluentd', '~> 0.10'
24
+
25
+ gem.add_development_dependency 'rake', '~> 10.3'
26
+ gem.add_development_dependency 'rubocop', '= 0.42.0'
27
+ gem.add_development_dependency 'test-unit', '~> 3.0'
28
+ end
@@ -0,0 +1,328 @@
1
+ # Copyright 2016 Google Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ module Fluent
16
+ Struct.new('Rule', :from_state, :pattern, :to_state)
17
+
18
+ # Configuration of the state machine that detects exceptions.
19
+ module ExceptionDetectorConfig
20
+ # Rule for a state transition: if pattern matches go to the given state.
21
+ class RuleTarget
22
+ attr_accessor :pattern, :to_state
23
+
24
+ def initialize(p, s)
25
+ @pattern = p
26
+ @to_state = s
27
+ end
28
+
29
+ def ==(other)
30
+ other.class == self.class && other.state == state
31
+ end
32
+
33
+ alias eql? ==
34
+
35
+ def hash
36
+ state.hash
37
+ end
38
+
39
+ def state
40
+ [@pattern, @to_state]
41
+ end
42
+ end
43
+
44
+ def self.rule(from_state, pattern, to_state)
45
+ Struct::Rule.new(from_state, pattern, to_state)
46
+ end
47
+
48
+ def self.supported
49
+ RULES_BY_LANG.keys
50
+ end
51
+
52
+ JAVA_RULES = [
53
+ rule(:start_state, /(?:Exception|Error|Throwable|V8 errors stack trace)[:\r\n]/, :java),
54
+ rule(:start_state, /(?:ERROR|WARN)(\s+\[)/, :java_stack_begin),
55
+ rule(:java_stack_begin, /(?:Exception|Error|Throwable|V8 errors stack trace)[:\r\n]/, :java),
56
+ rule(:java, /^[\t ]+(?:eval )?at /, :java),
57
+ rule(:java, /^(?:eval )?! at/, :java),
58
+ rule(:java, /^[\t ]*(?:Caused by|Suppressed):/, :java),
59
+ rule(:java, /^[\t ]*... \d+\ (more|common frames omitted)/, :java)
60
+ ].freeze
61
+
62
+ KAFKA_RULES = [
63
+ rule(:start_state, /(?:ERROR|WARN)(\s+\[).*kafka/, :kafka_failure_info),
64
+ rule(:start_state, /^[0-9]{4}-[0-1][0-9]-[0-3][0-9]T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z (?:ERROR|WARN)/, :kafka_failure_info),
65
+ rule(:kafka_failure_info, /NoKafkaConnectionError/, :kafka_failure_info),
66
+ rule(:kafka_failure_info, /^(\s)*server: 'kafka/, :kafka_failure_info),
67
+ rule(:kafka_failure_info, /^(\s)*message:/, :kafka_failure_info),
68
+ rule(:kafka_failure_info, /^Message-Timestamp: /, :kafka_failure_info),
69
+ rule(:kafka_failure_info, /^Message-Type: /, :kafka_failure_info),
70
+ rule(:kafka_failure_info, /^Origin-System-Id: /, :kafka_failure_info),
71
+ rule(:kafka_failure_info, /^Origin-Host-Location: /, :kafka_failure_info),
72
+ rule(:kafka_failure_info, /^Content-Type: /, :kafka_failure_info),
73
+ rule(:kafka_failure_info, /^Origin-Host: /, :kafka_failure_info),
74
+ rule(:kafka_failure_info, /^{*.+}+*.partitionKey*.+topic/, :kafka_failure_info),
75
+ rule(:kafka_failure_info, /^Message-Id: [a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}/, :java),
76
+ rule(:kafka_failure_info, /^\[at /, :java),
77
+ rule(:kafka_failure_info, /(?:Exception|Error|Throwable|V8 errors stack trace)[:\r\n]/, :java),
78
+ rule(:java, /^(?:eval )?! at/, :java)
79
+ ].freeze
80
+
81
+ PYTHON_RULES = [
82
+ rule(:start_state, /Traceback \(most recent call last\)/, :python),
83
+ rule(:python, /^[\t ]+File /, :python_code),
84
+ rule(:python_code, /[^\t ]/, :python),
85
+ rule(:python, /^(?:[^\s.():]+\.)*[^\s.():]+:/, :start_state)
86
+ ].freeze
87
+
88
+ ELIXIR_RULES = [
89
+ rule(:start_state, /^\d{2}:\d{2}:\d{2}\.\d{3} \[error\] /, :elixir_failure_info),
90
+ rule(:elixir_failure_info, /^[^\d]/, :elixir_failure_info)
91
+ ].freeze
92
+
93
+ PHP_RULES = [
94
+ rule(:start_state, /
95
+ (?:PHP\ (?:Notice|Parse\ error|Fatal\ error|Warning):)|
96
+ (?:exception\ '[^']+'\ with\ message\ ')/x, :php_stack_begin),
97
+ rule(:php_stack_begin, /^Stack trace:/, :php_stack_frames),
98
+ rule(:php_stack_frames, /^#\d/, :php_stack_frames),
99
+ rule(:php_stack_frames, /^\s+thrown in /, :start_state)
100
+ ].freeze
101
+
102
+ GO_RULES = [
103
+ rule(:start_state, /panic: /, :go_before_goroutine),
104
+ rule(:go_before_goroutine, /^$/, :go_goroutine),
105
+ rule(:go_goroutine, /^goroutine \d+ \[[^\]]+\]:$/, :go_frame_1),
106
+ rule(:go_frame_1, /(?:[^\s.():]+\.)*[^\s.():]\(/, :go_frame_2),
107
+ rule(:go_frame_1, /^$/, :go_before_goroutine),
108
+ rule(:go_frame_2, /^\s/, :go_frame_1)
109
+ ].freeze
110
+
111
+ RUBY_RULES = [
112
+ rule(:start_state, /Error \(.*\):$/, :ruby),
113
+ rule(:ruby, /^[\t ]+.*?\.rb:\d+:in `/, :ruby)
114
+ ].freeze
115
+
116
+ ALL_RULES = (
117
+ KAFKA_RULES + JAVA_RULES + PYTHON_RULES + PHP_RULES + GO_RULES + RUBY_RULES + ELIXIR_RULES).freeze
118
+
119
+ RULES_BY_LANG = {
120
+ java: JAVA_RULES + KAFKA_RULES,
121
+ javascript: JAVA_RULES + KAFKA_RULES,
122
+ js: JAVA_RULES + KAFKA_RULES,
123
+ csharp: JAVA_RULES,
124
+ py: PYTHON_RULES,
125
+ python: PYTHON_RULES,
126
+ php: PHP_RULES,
127
+ go: GO_RULES,
128
+ rb: RUBY_RULES,
129
+ ruby: RUBY_RULES,
130
+ elixir: ELIXIR_RULES,
131
+ all: ALL_RULES
132
+ }.freeze
133
+
134
+ DEFAULT_FIELDS = %w(message log).freeze
135
+ end
136
+
137
+ # State machine that consumes individual log lines and detects
138
+ # multi-line stack traces.
139
+ class ExceptionDetector
140
+ def initialize(*languages)
141
+ @state = :start_state
142
+ @rules = Hash.new { |h, k| h[k] = [] }
143
+
144
+ languages = [:all] if languages.empty?
145
+
146
+ languages.each do |lang|
147
+ rule_config =
148
+ ExceptionDetectorConfig::RULES_BY_LANG.fetch(lang.downcase) do |_k|
149
+ raise ArgumentError, "Unknown language: #{lang}"
150
+ end
151
+
152
+ rule_config.each do |r|
153
+ target = ExceptionDetectorConfig::RuleTarget.new(r[:pattern],
154
+ r[:to_state])
155
+ @rules[r[:from_state]] << target
156
+ end
157
+ end
158
+
159
+ @rules.each_value(&:uniq!)
160
+ end
161
+
162
+ # Updates the state machine and returns the trace detection status:
163
+ # - no_trace: 'line' does not belong to an exception trace,
164
+ # - start_trace: 'line' starts a detected exception trace,
165
+ # - inside: 'line' is part of a detected exception trace,
166
+ # - end: the detected exception trace ends after 'line'.
167
+ def update(line)
168
+ trace_seen_before = transition(line)
169
+ # If the state machine fell back to the start state because there is no
170
+ # defined transition for 'line', trigger another state transition because
171
+ # 'line' may contain the beginning of another exception.
172
+ transition(line) unless trace_seen_before
173
+ new_state = @state
174
+ trace_seen_after = new_state != :start_state
175
+
176
+ case [trace_seen_before, trace_seen_after]
177
+ when [true, true]
178
+ :inside_trace
179
+ when [true, false]
180
+ :end_trace
181
+ when [false, true]
182
+ :start_trace
183
+ else
184
+ :no_trace
185
+ end
186
+ end
187
+
188
+ def reset
189
+ @state = :start_state
190
+ end
191
+
192
+ private
193
+
194
+ # Executes a transition of the state machine for the given line.
195
+ # Returns false if the line does not match any transition rule and the
196
+ # state machine was reset to the initial state.
197
+ def transition(line)
198
+ @rules[@state].each do |r|
199
+ next unless line =~ r.pattern
200
+ @state = r.to_state
201
+ return true
202
+ end
203
+ @state = :start_state
204
+ false
205
+ end
206
+ end
207
+
208
+ # Buffers and groups log records if they contain exception stack traces.
209
+ class TraceAccumulator
210
+ attr_reader :buffer_start_time
211
+
212
+ # If message_field is nil, the instance is set up to accumulate
213
+ # records that are plain strings (i.e. the whole record is concatenated).
214
+ # Otherwise, the instance accepts records that are dictionaries (usually
215
+ # originating from structured JSON logs) and accumulates just the
216
+ # content of the given message field.
217
+ # message_field may contain the empty string. In this case, the
218
+ # TraceAccumulator 'learns' the field name from the first record by checking
219
+ # for some pre-defined common field names of text logs.
220
+ # The named parameters max_lines and max_bytes limit the maximum amount
221
+ # of data to be buffered. The default value 0 indicates 'no limit'.
222
+ def initialize(message_field, languages, max_lines: 0, max_bytes: 0,
223
+ &emit_callback)
224
+ @exception_detector = Fluent::ExceptionDetector.new(*languages)
225
+ @max_lines = max_lines
226
+ @max_bytes = max_bytes
227
+ @message_field = message_field
228
+ @messages = []
229
+ @buffer_start_time = Time.now
230
+ @buffer_size = 0
231
+ @first_record = nil
232
+ @first_timestamp = nil
233
+ @emit = emit_callback
234
+ end
235
+
236
+ def push(time_sec, record)
237
+ message = extract_message(record)
238
+ if message.nil?
239
+ @exception_detector.reset
240
+ detection_status = :no_trace
241
+ else
242
+ force_flush if @max_bytes > 0 &&
243
+ @buffer_size + message.length > @max_bytes
244
+ detection_status = @exception_detector.update(message)
245
+ end
246
+ update_buffer(detection_status, time_sec, record, message)
247
+
248
+ force_flush if @max_lines > 0 && @messages.length == @max_lines
249
+ end
250
+
251
+ def flush
252
+ case @messages.length
253
+ when 0
254
+ return
255
+ when 1
256
+ @emit.call(@first_timestamp, @first_record)
257
+ else
258
+ combined_message = @messages.join
259
+ if @message_field.nil?
260
+ output_record = combined_message
261
+ else
262
+ output_record = @first_record
263
+ output_record[@message_field] = combined_message
264
+ end
265
+ @emit.call(@first_timestamp, output_record)
266
+ end
267
+ @messages = []
268
+ @first_record = nil
269
+ @first_timestamp = nil
270
+ @buffer_size = 0
271
+ end
272
+
273
+ def force_flush
274
+ flush
275
+ @exception_detector.reset
276
+ end
277
+
278
+ private
279
+
280
+ def extract_message(record)
281
+ if !@message_field.nil? && @message_field.empty?
282
+ ExceptionDetectorConfig::DEFAULT_FIELDS.each do |f|
283
+ if record.key?(f)
284
+ @message_field = f
285
+ break
286
+ end
287
+ end
288
+ end
289
+ @message_field.nil? ? record : record[@message_field]
290
+ end
291
+
292
+ def update_buffer(detection_status, time_sec, record, message)
293
+ trigger_emit = detection_status == :no_trace ||
294
+ detection_status == :end_trace
295
+ if @messages.empty? && trigger_emit
296
+ @emit.call(time_sec, record)
297
+ return
298
+ end
299
+
300
+ case detection_status
301
+ when :inside_trace
302
+ add(time_sec, record, message)
303
+ when :end_trace
304
+ add(time_sec, record, message)
305
+ flush
306
+ when :no_trace
307
+ flush
308
+ add(time_sec, record, message)
309
+ flush
310
+ when :start_trace
311
+ flush
312
+ add(time_sec, record, message)
313
+ end
314
+ end
315
+
316
+ def add(time_sec, record, message)
317
+ if @messages.empty?
318
+ @first_record = record unless @message_field.nil?
319
+ @first_timestamp = time_sec
320
+ @buffer_start_time = Time.now
321
+ end
322
+ unless message.nil?
323
+ @messages << message
324
+ @buffer_size += message.length
325
+ end
326
+ end
327
+ end
328
+ end
@@ -0,0 +1,136 @@
1
+ #
2
+ # Copyright 2016 Google Inc. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require 'fluent/plugin/exception_detector'
17
+ require 'fluent/output'
18
+
19
+ module Fluent
20
+ # This output plugin consumes a log stream of JSON objects which contain
21
+ # single-line log messages. If a consecutive sequence of log messages form
22
+ # an exception stack trace, they forwarded as a single, combined JSON
23
+ # object. Otherwise, the input log data is forwarded as is.
24
+ class DetectExceptionsOutput < Output
25
+ desc 'The field which contains the raw message text in the input JSON data.'
26
+ config_param :message, :string, default: ''
27
+ desc 'The prefix to be removed from the input tag when outputting a record.'
28
+ config_param :remove_tag_prefix, :string, default: ''
29
+ desc 'The interval of flushing the buffer for multiline format.'
30
+ config_param :multiline_flush_interval, :time, default: nil
31
+ desc 'Programming languages for which to detect exceptions. Default: all.'
32
+ config_param :languages, :array, value_type: :string, default: []
33
+ desc 'Maximum number of lines to flush (0 means no limit). Default: 1000.'
34
+ config_param :max_lines, :integer, default: 1000
35
+ desc 'Maximum number of bytes to flush (0 means no limit). Default: 0.'
36
+ config_param :max_bytes, :integer, default: 0
37
+ desc 'Separate log streams by this field in the input JSON data.'
38
+ config_param :stream, :string, default: ''
39
+
40
+ Fluent::Plugin.register_output('detect_exceptions', self)
41
+
42
+ def configure(conf)
43
+ super
44
+ if multiline_flush_interval
45
+ @check_flush_interval = [multiline_flush_interval * 0.1, 1].max
46
+ end
47
+
48
+ @languages = languages.map(&:to_sym)
49
+
50
+ # Maps log stream tags to a corresponding TraceAccumulator.
51
+ @accumulators = {}
52
+ end
53
+
54
+ def start
55
+ super
56
+ if multiline_flush_interval
57
+ @flush_buffer_mutex = Mutex.new
58
+ @stop_check = false
59
+ @thread = Thread.new(&method(:check_flush_loop))
60
+ end
61
+ end
62
+
63
+ def before_shutdown
64
+ flush_buffers
65
+ super if defined?(super)
66
+ end
67
+
68
+ def shutdown
69
+ # Before shutdown is not available in older fluentd versions.
70
+ # Hence, we make sure that we flush the buffers here as well.
71
+ flush_buffers
72
+ @thread.join if @multiline_flush_interval
73
+ super
74
+ end
75
+
76
+ def emit(tag, es, chain)
77
+ es.each do |time_sec, record|
78
+ process_record(tag, time_sec, record)
79
+ end
80
+ chain.next
81
+ end
82
+
83
+ private
84
+
85
+ def process_record(tag, time_sec, record)
86
+ synchronize do
87
+ log_id = [tag]
88
+ log_id.push(record.fetch(@stream, '')) unless @stream.empty?
89
+ unless @accumulators.key?(log_id)
90
+ out_tag = tag.sub(/^#{Regexp.escape(@remove_tag_prefix)}\./, '')
91
+ @accumulators[log_id] =
92
+ Fluent::TraceAccumulator.new(@message, @languages,
93
+ max_lines: @max_lines,
94
+ max_bytes: @max_bytes) do |t, r|
95
+ router.emit(out_tag, t, r)
96
+ end
97
+ end
98
+
99
+ @accumulators[log_id].push(time_sec, record)
100
+ end
101
+ end
102
+
103
+ def flush_buffers
104
+ synchronize do
105
+ @stop_check = true
106
+ @accumulators.each_value(&:force_flush)
107
+ end
108
+ end
109
+
110
+ def check_flush_loop
111
+ @flush_buffer_mutex.synchronize do
112
+ loop do
113
+ @flush_buffer_mutex.sleep(@check_flush_interval)
114
+ now = Time.now
115
+ break if @stop_check
116
+ log.debug 'Reached flush loop so stopping'
117
+ @accumulators.each_value do |acc|
118
+ acc.force_flush if now - acc.buffer_start_time >
119
+ @multiline_flush_interval
120
+ end
121
+ end
122
+ end
123
+ rescue
124
+ log.error 'error in check_flush_loop', error: $ERROR_INFO.to_s
125
+ log.error_backtrace
126
+ end
127
+
128
+ def synchronize(&block)
129
+ if @multiline_flush_interval
130
+ @flush_buffer_mutex.synchronize(&block)
131
+ else
132
+ yield
133
+ end
134
+ end
135
+ end
136
+ end