fluent-plugin-cloudwatch-logs-yajl 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,377 @@
1
+ require 'fluent/output'
2
+ require 'thread'
3
+
4
+ module Fluent
5
+ require 'fluent/mixin/config_placeholders'
6
+
7
+ class CloudwatchLogsOutput < BufferedOutput
8
+ Plugin.register_output('cloudwatch_logs', self)
9
+
10
+ include Fluent::SetTimeKeyMixin
11
+ include Fluent::Mixin::ConfigPlaceholders
12
+
13
+ config_param :aws_key_id, :string, :default => nil, :secret => true
14
+ config_param :aws_sec_key, :string, :default => nil, :secret => true
15
+ config_param :region, :string, :default => nil
16
+ config_param :log_group_name, :string, :default => nil
17
+ config_param :log_stream_name, :string, :default => nil
18
+ config_param :auto_create_stream, :bool, default: false
19
+ config_param :message_keys, :string, :default => nil
20
+ config_param :max_message_length, :integer, :default => nil
21
+ config_param :max_events_per_batch, :integer, :default => 10000
22
+ config_param :use_tag_as_group, :bool, :default => false # TODO: Rename to use_tag_as_group_name ?
23
+ config_param :use_tag_as_stream, :bool, :default => false # TODO: Rename to use_tag_as_stream_name ?
24
+ config_param :log_group_name_key, :string, :default => nil
25
+ config_param :log_stream_name_key, :string, :default => nil
26
+ config_param :remove_log_group_name_key, :bool, :default => false
27
+ config_param :remove_log_stream_name_key, :bool, :default => false
28
+ config_param :http_proxy, :string, default: nil
29
+ config_param :put_log_events_retry_wait, :time, default: 1.0
30
+ config_param :put_log_events_retry_limit, :integer, default: 17
31
+ config_param :put_log_events_disable_retry_limit, :bool, default: false
32
+ config_param :concurrency, :integer, default: 1
33
+
34
+ MAX_EVENTS_SIZE = 1_048_576
35
+ MAX_EVENT_SIZE = 256 * 1024
36
+ EVENT_HEADER_SIZE = 26
37
+
38
+ unless method_defined?(:log)
39
+ define_method(:log) { $log }
40
+ end
41
+
42
+ def initialize
43
+ super
44
+
45
+ require 'aws-sdk-cloudwatchlogs'
46
+ end
47
+
48
+ def placeholders
49
+ [:percent]
50
+ end
51
+
52
+ def configure(conf)
53
+ super
54
+
55
+ unless [conf['log_group_name'], conf['use_tag_as_group'], conf['log_group_name_key']].compact.size == 1
56
+ raise ConfigError, "Set only one of log_group_name, use_tag_as_group and log_group_name_key"
57
+ end
58
+
59
+ unless [conf['log_stream_name'], conf['use_tag_as_stream'], conf['log_stream_name_key']].compact.size == 1
60
+ raise ConfigError, "Set only one of log_stream_name, use_tag_as_stream and log_stream_name_key"
61
+ end
62
+ end
63
+
64
+ def start
65
+ super
66
+
67
+ options = {}
68
+ options[:credentials] = Aws::Credentials.new(@aws_key_id, @aws_sec_key) if @aws_key_id && @aws_sec_key
69
+ options[:region] = @region if @region
70
+ options[:http_proxy] = @http_proxy if @http_proxy
71
+ @logs ||= Aws::CloudWatchLogs::Client.new(options)
72
+ @sequence_tokens = {}
73
+ @store_next_sequence_token_mutex = Mutex.new
74
+ end
75
+
76
+ def format(tag, time, record)
77
+ [tag, time, record].to_msgpack
78
+ end
79
+
80
+ def write(chunk)
81
+ queue = Thread::Queue.new
82
+
83
+ chunk.enum_for(:msgpack_each).select {|tag, time, record|
84
+ if record.nil?
85
+ log.warn "record is nil (tag=#{tag})"
86
+ false
87
+ else
88
+ true
89
+ end
90
+ }.group_by {|tag, time, record|
91
+ group = case
92
+ when @use_tag_as_group
93
+ tag
94
+ when @log_group_name_key
95
+ if @remove_log_group_name_key
96
+ record.delete(@log_group_name_key)
97
+ else
98
+ record[@log_group_name_key]
99
+ end
100
+ else
101
+ @log_group_name
102
+ end
103
+
104
+ stream = case
105
+ when @use_tag_as_stream
106
+ tag
107
+ when @log_stream_name_key
108
+ if @remove_log_stream_name_key
109
+ record.delete(@log_stream_name_key)
110
+ else
111
+ record[@log_stream_name_key]
112
+ end
113
+ else
114
+ @log_stream_name
115
+ end
116
+
117
+ [group, stream]
118
+ }.each {|group_stream, rs|
119
+ group_name, stream_name = group_stream
120
+
121
+ if stream_name.nil?
122
+ log.warn "stream_name is nil (group_name=#{group_name})"
123
+ next
124
+ end
125
+
126
+ unless log_group_exists?(group_name)
127
+ if @auto_create_stream
128
+ create_log_group(group_name)
129
+ else
130
+ log.warn "Log group '#{group_name}' does not exist"
131
+ next
132
+ end
133
+ end
134
+
135
+ unless log_stream_exists?(group_name, stream_name)
136
+ if @auto_create_stream
137
+ create_log_stream(group_name, stream_name)
138
+ else
139
+ log.warn "Log stream '#{stream_name}' does not exist"
140
+ next
141
+ end
142
+ end
143
+
144
+ events = []
145
+ rs.each do |t, time, record|
146
+ time_ms = time * 1000
147
+
148
+ scrub_record!(record)
149
+ if @message_keys
150
+ message = @message_keys.split(',').map {|k| record[k].to_s }.join(' ')
151
+ else
152
+ message = Yajl.dump(record)
153
+ end
154
+
155
+ if @max_message_length
156
+ message = message.slice(0, @max_message_length)
157
+ end
158
+
159
+ events << {timestamp: time_ms, message: message}
160
+ end
161
+ # The log events in the batch must be in chronological ordered by their timestamp.
162
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
163
+ events = events.sort_by {|e| e[:timestamp] }
164
+
165
+ queue << [group_name, stream_name, events]
166
+ }
167
+
168
+ @concurrency.times do
169
+ queue << nil
170
+ end
171
+ threads = @concurrency.times.map do |i|
172
+ Thread.start do
173
+ while job = queue.shift
174
+ group_name, stream_name, events = job
175
+ put_events_by_chunk(group_name, stream_name, events)
176
+ end
177
+ end
178
+ end
179
+ threads.each(&:join)
180
+ end
181
+
182
+ private
183
+ def scrub_record!(record)
184
+ case record
185
+ when Hash
186
+ record.each_value {|v| scrub_record!(v) }
187
+ when Array
188
+ record.each {|v| scrub_record!(v) }
189
+ when String
190
+ record.scrub!
191
+ end
192
+ end
193
+
194
+ def delete_sequence_token(group_name, stream_name)
195
+ @sequence_tokens[group_name].delete(stream_name)
196
+ end
197
+
198
+ def next_sequence_token(group_name, stream_name)
199
+ @sequence_tokens[group_name][stream_name]
200
+ end
201
+
202
+ def store_next_sequence_token(group_name, stream_name, token)
203
+ @store_next_sequence_token_mutex.synchronize do
204
+ @sequence_tokens[group_name][stream_name] = token
205
+ end
206
+ end
207
+
208
+ def put_events_by_chunk(group_name, stream_name, events)
209
+ chunk = []
210
+
211
+ # The maximum batch size is 1,048,576 bytes, and this size is calculated as the sum of all event messages in UTF-8, plus 26 bytes for each log event.
212
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
213
+ total_bytesize = 0
214
+ while event = events.shift
215
+ event_bytesize = event[:message].bytesize + EVENT_HEADER_SIZE
216
+ if MAX_EVENT_SIZE < event_bytesize
217
+ log.warn "Log event is discarded because it is too large: #{event_bytesize} bytes exceeds limit of #{MAX_EVENT_SIZE}"
218
+ break
219
+ end
220
+
221
+ new_chunk = chunk + [event]
222
+
223
+ chunk_span_too_big = new_chunk.size > 1 && new_chunk[-1][:timestamp] - new_chunk[0][:timestamp] >= 1000 * 60 * 60 * 24
224
+ chunk_too_big = total_bytesize + event_bytesize > MAX_EVENTS_SIZE
225
+ chunk_too_long = @max_events_per_batch && chunk.size >= @max_events_per_batch
226
+ if chunk_too_big or chunk_span_too_big or chunk_too_long
227
+ put_events(group_name, stream_name, chunk, total_bytesize)
228
+ chunk = [event]
229
+ total_bytesize = event_bytesize
230
+ else
231
+ chunk << event
232
+ total_bytesize += event_bytesize
233
+ end
234
+ end
235
+
236
+ unless chunk.empty?
237
+ put_events(group_name, stream_name, chunk, total_bytesize)
238
+ end
239
+ end
240
+
241
+ def put_events(group_name, stream_name, events, events_bytesize)
242
+ response = nil
243
+ retry_count = 0
244
+
245
+ until response
246
+ args = {
247
+ log_events: events,
248
+ log_group_name: group_name,
249
+ log_stream_name: stream_name,
250
+ }
251
+
252
+ token = next_sequence_token(group_name, stream_name)
253
+ args[:sequence_token] = token if token
254
+
255
+ begin
256
+ t = Time.now
257
+ response = @logs.put_log_events(args)
258
+ log.debug "Called PutLogEvents API", {
259
+ "group" => group_name,
260
+ "stream" => stream_name,
261
+ "events_count" => events.size,
262
+ "events_bytesize" => events_bytesize,
263
+ "sequence_token" => token,
264
+ "thread" => Thread.current.object_id,
265
+ "request_sec" => Time.now - t,
266
+ }
267
+ log.warn response.rejected_log_events_info if response.rejected_log_events_info != nil
268
+ rescue Aws::CloudWatchLogs::Errors::InvalidSequenceTokenException, Aws::CloudWatchLogs::Errors::DataAlreadyAcceptedException => err
269
+ sleep 1 # to avoid too many API calls
270
+ log_stream = find_log_stream(group_name, stream_name)
271
+ store_next_sequence_token(group_name, stream_name, log_stream.upload_sequence_token)
272
+ log.warn "updating upload sequence token forcefully because unrecoverable error occured", {
273
+ "error" => err,
274
+ "log_group" => group_name,
275
+ "log_stream" => stream_name,
276
+ "new_sequence_token" => token,
277
+ }
278
+ rescue Aws::CloudWatchLogs::Errors::ResourceNotFoundException => err
279
+ if @auto_create_stream && err.message == 'The specified log stream does not exist.'
280
+ log.warn 'Creating log stream because "The specified log stream does not exist." error is got', {
281
+ "error" => err,
282
+ "log_group" => group_name,
283
+ "log_stream" => stream_name,
284
+ }
285
+ create_log_stream(group_name, stream_name)
286
+ delete_sequence_token(group_name, stream_name)
287
+ else
288
+ raise err
289
+ end
290
+ rescue Aws::CloudWatchLogs::Errors::ThrottlingException => err
291
+ if !@put_log_events_disable_retry_limit && @put_log_events_retry_limit < retry_count
292
+ log.error "failed to PutLogEvents and discard logs because retry count exceeded put_log_events_retry_limit", {
293
+ "error_class" => err.class.to_s,
294
+ "error" => err.message,
295
+ }
296
+ return
297
+ else
298
+ sleep_sec = @put_log_events_retry_wait * (2 ** retry_count)
299
+ sleep_sec += sleep_sec * (0.25 * (rand - 0.5))
300
+ log.warn "failed to PutLogEvents", {
301
+ "next_retry" => Time.now + sleep_sec,
302
+ "error_class" => err.class.to_s,
303
+ "error" => err.message,
304
+ }
305
+ sleep(sleep_sec)
306
+ retry_count += 1
307
+ end
308
+ end
309
+ end
310
+
311
+ if 0 < retry_count
312
+ log.warn "retry succeeded"
313
+ end
314
+
315
+ store_next_sequence_token(group_name, stream_name, response.next_sequence_token)
316
+ end
317
+
318
+ def create_log_group(group_name)
319
+ begin
320
+ @logs.create_log_group(log_group_name: group_name)
321
+ @sequence_tokens[group_name] = {}
322
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
323
+ log.debug "Log group '#{group_name}' already exists"
324
+ end
325
+ end
326
+
327
+ def create_log_stream(group_name, stream_name)
328
+ begin
329
+ @logs.create_log_stream(log_group_name: group_name, log_stream_name: stream_name)
330
+ @sequence_tokens[group_name] ||= {}
331
+ @sequence_tokens[group_name][stream_name] = nil
332
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
333
+ log.debug "Log stream '#{stream_name}' already exists"
334
+ end
335
+ end
336
+
337
+ def log_group_exists?(group_name)
338
+ if @sequence_tokens[group_name]
339
+ true
340
+ elsif @logs.describe_log_groups.any? {|page| page.log_groups.any? {|i| i.log_group_name == group_name } }
341
+ @sequence_tokens[group_name] = {}
342
+ true
343
+ else
344
+ false
345
+ end
346
+ end
347
+
348
+ def log_stream_exists?(group_name, stream_name)
349
+ if not @sequence_tokens[group_name]
350
+ false
351
+ elsif @sequence_tokens[group_name].has_key?(stream_name)
352
+ true
353
+ elsif (log_stream = find_log_stream(group_name, stream_name))
354
+ @sequence_tokens[group_name][stream_name] = log_stream.upload_sequence_token
355
+ true
356
+ else
357
+ false
358
+ end
359
+ end
360
+
361
+ def find_log_stream(group_name, stream_name)
362
+ next_token = nil
363
+ loop do
364
+ response = @logs.describe_log_streams(log_group_name: group_name, log_stream_name_prefix: stream_name, next_token: next_token)
365
+ if (log_stream = response.log_streams.find {|i| i.log_stream_name == stream_name })
366
+ return log_stream
367
+ end
368
+ if response.next_token.nil?
369
+ break
370
+ end
371
+ next_token = response.next_token
372
+ sleep 0.1
373
+ end
374
+ nil
375
+ end
376
+ end
377
+ end
@@ -0,0 +1,157 @@
1
+ require 'test_helper'
2
+
3
+ class CloudwatchLogsInputTest < Test::Unit::TestCase
4
+ include CloudwatchLogsTestHelper
5
+
6
+ def setup
7
+ Fluent::Test.setup
8
+ require 'fluent/plugin/in_cloudwatch_logs'
9
+
10
+ end
11
+
12
+ def teardown
13
+ clear_log_group
14
+ end
15
+
16
+ def test_configure
17
+ d = create_driver(<<-EOC)
18
+ type cloudwatch_logs
19
+ aws_key_id test_id
20
+ aws_sec_key test_key
21
+ region us-east-1
22
+ tag test
23
+ log_group_name group
24
+ log_stream_name stream
25
+ use_log_stream_name_prefix true
26
+ state_file /tmp/state
27
+ EOC
28
+
29
+ assert_equal('test_id', d.instance.aws_key_id)
30
+ assert_equal('test_key', d.instance.aws_sec_key)
31
+ assert_equal('us-east-1', d.instance.region)
32
+ assert_equal('test', d.instance.tag)
33
+ assert_equal('group', d.instance.log_group_name)
34
+ assert_equal('stream', d.instance.log_stream_name)
35
+ assert_equal(true, d.instance.use_log_stream_name_prefix)
36
+ assert_equal('/tmp/state', d.instance.state_file)
37
+ end
38
+
39
+ def test_emit
40
+ create_log_stream
41
+
42
+ time_ms = (Time.now.to_f * 1000).floor
43
+ put_log_events([
44
+ {timestamp: time_ms, message: '{"cloudwatch":"logs1"}'},
45
+ {timestamp: time_ms, message: '{"cloudwatch":"logs2"}'},
46
+ ])
47
+
48
+ sleep 5
49
+
50
+ d = create_driver
51
+ d.run do
52
+ sleep 5
53
+ end
54
+
55
+ emits = d.emits
56
+ assert_equal(2, emits.size)
57
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs1'}], emits[0])
58
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs2'}], emits[1])
59
+ end
60
+
61
+ def test_emit_width_format
62
+ create_log_stream
63
+
64
+ time_ms = (Time.now.to_f * 1000).floor
65
+ put_log_events([
66
+ {timestamp: time_ms, message: 'logs1'},
67
+ {timestamp: time_ms, message: 'logs2'},
68
+ ])
69
+
70
+ sleep 5
71
+
72
+ d = create_driver(<<-EOC)
73
+ tag test
74
+ type cloudwatch_logs
75
+ log_group_name #{log_group_name}
76
+ log_stream_name #{log_stream_name}
77
+ state_file /tmp/state
78
+ format /^(?<cloudwatch>[^ ]*)?/
79
+ #{aws_key_id}
80
+ #{aws_sec_key}
81
+ #{region}
82
+ EOC
83
+
84
+ d.run do
85
+ sleep 5
86
+ end
87
+
88
+ emits = d.emits
89
+ assert_equal(2, emits.size)
90
+ assert_equal('test', emits[0][0])
91
+ assert_in_delta((time_ms / 1000).floor, emits[0][1], 10)
92
+ assert_equal({'cloudwatch' => 'logs1'}, emits[0][2])
93
+ assert_equal('test', emits[1][0])
94
+ assert_in_delta((time_ms / 1000).floor, emits[1][1], 10)
95
+ assert_equal({'cloudwatch' => 'logs2'}, emits[1][2])
96
+ end
97
+
98
+ def test_emit_with_prefix
99
+ new_log_stream("testprefix")
100
+ create_log_stream
101
+
102
+ time_ms = (Time.now.to_f * 1000).floor
103
+ put_log_events([
104
+ {timestamp: time_ms, message: '{"cloudwatch":"logs1"}'},
105
+ {timestamp: time_ms, message: '{"cloudwatch":"logs2"}'},
106
+ ])
107
+
108
+ new_log_stream("testprefix")
109
+ create_log_stream
110
+ put_log_events([
111
+ {timestamp: time_ms, message: '{"cloudwatch":"logs3"}'},
112
+ {timestamp: time_ms, message: '{"cloudwatch":"logs4"}'},
113
+ ])
114
+
115
+ sleep 5
116
+
117
+ d = create_driver(<<-EOC)
118
+ tag test
119
+ type cloudwatch_logs
120
+ log_group_name #{log_group_name}
121
+ log_stream_name testprefix
122
+ use_log_stream_name_prefix true
123
+ state_file /tmp/state
124
+ #{aws_key_id}
125
+ #{aws_sec_key}
126
+ #{region}
127
+ EOC
128
+ d.run do
129
+ sleep 5
130
+ end
131
+
132
+ emits = d.emits
133
+ assert_equal(4, emits.size)
134
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs1'}], emits[0])
135
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs2'}], emits[1])
136
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs3'}], emits[2])
137
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs4'}], emits[3])
138
+ end
139
+
140
+ private
141
+ def default_config
142
+ <<-EOC
143
+ tag test
144
+ type cloudwatch_logs
145
+ log_group_name #{log_group_name}
146
+ log_stream_name #{log_stream_name}
147
+ state_file /tmp/state
148
+ #{aws_key_id}
149
+ #{aws_sec_key}
150
+ #{region}
151
+ EOC
152
+ end
153
+
154
+ def create_driver(conf = default_config)
155
+ Fluent::Test::InputTestDriver.new(Fluent::CloudwatchLogsInput).configure(conf)
156
+ end
157
+ end