fluent-plugin-cloudwatch-logs-yajl 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ require 'fluent/output'
2
+ require 'thread'
3
+
4
+ module Fluent
5
+ require 'fluent/mixin/config_placeholders'
6
+
7
+ class CloudwatchLogsOutput < BufferedOutput
8
+ Plugin.register_output('cloudwatch_logs', self)
9
+
10
+ include Fluent::SetTimeKeyMixin
11
+ include Fluent::Mixin::ConfigPlaceholders
12
+
13
+ config_param :aws_key_id, :string, :default => nil, :secret => true
14
+ config_param :aws_sec_key, :string, :default => nil, :secret => true
15
+ config_param :region, :string, :default => nil
16
+ config_param :log_group_name, :string, :default => nil
17
+ config_param :log_stream_name, :string, :default => nil
18
+ config_param :auto_create_stream, :bool, default: false
19
+ config_param :message_keys, :string, :default => nil
20
+ config_param :max_message_length, :integer, :default => nil
21
+ config_param :max_events_per_batch, :integer, :default => 10000
22
+ config_param :use_tag_as_group, :bool, :default => false # TODO: Rename to use_tag_as_group_name ?
23
+ config_param :use_tag_as_stream, :bool, :default => false # TODO: Rename to use_tag_as_stream_name ?
24
+ config_param :log_group_name_key, :string, :default => nil
25
+ config_param :log_stream_name_key, :string, :default => nil
26
+ config_param :remove_log_group_name_key, :bool, :default => false
27
+ config_param :remove_log_stream_name_key, :bool, :default => false
28
+ config_param :http_proxy, :string, default: nil
29
+ config_param :put_log_events_retry_wait, :time, default: 1.0
30
+ config_param :put_log_events_retry_limit, :integer, default: 17
31
+ config_param :put_log_events_disable_retry_limit, :bool, default: false
32
+ config_param :concurrency, :integer, default: 1
33
+
34
+ MAX_EVENTS_SIZE = 1_048_576
35
+ MAX_EVENT_SIZE = 256 * 1024
36
+ EVENT_HEADER_SIZE = 26
37
+
38
+ unless method_defined?(:log)
39
+ define_method(:log) { $log }
40
+ end
41
+
42
+ def initialize
43
+ super
44
+
45
+ require 'aws-sdk-cloudwatchlogs'
46
+ end
47
+
48
+ def placeholders
49
+ [:percent]
50
+ end
51
+
52
+ def configure(conf)
53
+ super
54
+
55
+ unless [conf['log_group_name'], conf['use_tag_as_group'], conf['log_group_name_key']].compact.size == 1
56
+ raise ConfigError, "Set only one of log_group_name, use_tag_as_group and log_group_name_key"
57
+ end
58
+
59
+ unless [conf['log_stream_name'], conf['use_tag_as_stream'], conf['log_stream_name_key']].compact.size == 1
60
+ raise ConfigError, "Set only one of log_stream_name, use_tag_as_stream and log_stream_name_key"
61
+ end
62
+ end
63
+
64
+ def start
65
+ super
66
+
67
+ options = {}
68
+ options[:credentials] = Aws::Credentials.new(@aws_key_id, @aws_sec_key) if @aws_key_id && @aws_sec_key
69
+ options[:region] = @region if @region
70
+ options[:http_proxy] = @http_proxy if @http_proxy
71
+ @logs ||= Aws::CloudWatchLogs::Client.new(options)
72
+ @sequence_tokens = {}
73
+ @store_next_sequence_token_mutex = Mutex.new
74
+ end
75
+
76
+ def format(tag, time, record)
77
+ [tag, time, record].to_msgpack
78
+ end
79
+
80
+ def write(chunk)
81
+ queue = Thread::Queue.new
82
+
83
+ chunk.enum_for(:msgpack_each).select {|tag, time, record|
84
+ if record.nil?
85
+ log.warn "record is nil (tag=#{tag})"
86
+ false
87
+ else
88
+ true
89
+ end
90
+ }.group_by {|tag, time, record|
91
+ group = case
92
+ when @use_tag_as_group
93
+ tag
94
+ when @log_group_name_key
95
+ if @remove_log_group_name_key
96
+ record.delete(@log_group_name_key)
97
+ else
98
+ record[@log_group_name_key]
99
+ end
100
+ else
101
+ @log_group_name
102
+ end
103
+
104
+ stream = case
105
+ when @use_tag_as_stream
106
+ tag
107
+ when @log_stream_name_key
108
+ if @remove_log_stream_name_key
109
+ record.delete(@log_stream_name_key)
110
+ else
111
+ record[@log_stream_name_key]
112
+ end
113
+ else
114
+ @log_stream_name
115
+ end
116
+
117
+ [group, stream]
118
+ }.each {|group_stream, rs|
119
+ group_name, stream_name = group_stream
120
+
121
+ if stream_name.nil?
122
+ log.warn "stream_name is nil (group_name=#{group_name})"
123
+ next
124
+ end
125
+
126
+ unless log_group_exists?(group_name)
127
+ if @auto_create_stream
128
+ create_log_group(group_name)
129
+ else
130
+ log.warn "Log group '#{group_name}' does not exist"
131
+ next
132
+ end
133
+ end
134
+
135
+ unless log_stream_exists?(group_name, stream_name)
136
+ if @auto_create_stream
137
+ create_log_stream(group_name, stream_name)
138
+ else
139
+ log.warn "Log stream '#{stream_name}' does not exist"
140
+ next
141
+ end
142
+ end
143
+
144
+ events = []
145
+ rs.each do |t, time, record|
146
+ time_ms = time * 1000
147
+
148
+ scrub_record!(record)
149
+ if @message_keys
150
+ message = @message_keys.split(',').map {|k| record[k].to_s }.join(' ')
151
+ else
152
+ message = Yajl.dump(record)
153
+ end
154
+
155
+ if @max_message_length
156
+ message = message.slice(0, @max_message_length)
157
+ end
158
+
159
+ events << {timestamp: time_ms, message: message}
160
+ end
161
+ # The log events in the batch must be in chronological ordered by their timestamp.
162
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
163
+ events = events.sort_by {|e| e[:timestamp] }
164
+
165
+ queue << [group_name, stream_name, events]
166
+ }
167
+
168
+ @concurrency.times do
169
+ queue << nil
170
+ end
171
+ threads = @concurrency.times.map do |i|
172
+ Thread.start do
173
+ while job = queue.shift
174
+ group_name, stream_name, events = job
175
+ put_events_by_chunk(group_name, stream_name, events)
176
+ end
177
+ end
178
+ end
179
+ threads.each(&:join)
180
+ end
181
+
182
+ private
183
+ def scrub_record!(record)
184
+ case record
185
+ when Hash
186
+ record.each_value {|v| scrub_record!(v) }
187
+ when Array
188
+ record.each {|v| scrub_record!(v) }
189
+ when String
190
+ record.scrub!
191
+ end
192
+ end
193
+
194
+ def delete_sequence_token(group_name, stream_name)
195
+ @sequence_tokens[group_name].delete(stream_name)
196
+ end
197
+
198
+ def next_sequence_token(group_name, stream_name)
199
+ @sequence_tokens[group_name][stream_name]
200
+ end
201
+
202
+ def store_next_sequence_token(group_name, stream_name, token)
203
+ @store_next_sequence_token_mutex.synchronize do
204
+ @sequence_tokens[group_name][stream_name] = token
205
+ end
206
+ end
207
+
208
+ def put_events_by_chunk(group_name, stream_name, events)
209
+ chunk = []
210
+
211
+ # The maximum batch size is 1,048,576 bytes, and this size is calculated as the sum of all event messages in UTF-8, plus 26 bytes for each log event.
212
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
213
+ total_bytesize = 0
214
+ while event = events.shift
215
+ event_bytesize = event[:message].bytesize + EVENT_HEADER_SIZE
216
+ if MAX_EVENT_SIZE < event_bytesize
217
+ log.warn "Log event is discarded because it is too large: #{event_bytesize} bytes exceeds limit of #{MAX_EVENT_SIZE}"
218
+ break
219
+ end
220
+
221
+ new_chunk = chunk + [event]
222
+
223
+ chunk_span_too_big = new_chunk.size > 1 && new_chunk[-1][:timestamp] - new_chunk[0][:timestamp] >= 1000 * 60 * 60 * 24
224
+ chunk_too_big = total_bytesize + event_bytesize > MAX_EVENTS_SIZE
225
+ chunk_too_long = @max_events_per_batch && chunk.size >= @max_events_per_batch
226
+ if chunk_too_big or chunk_span_too_big or chunk_too_long
227
+ put_events(group_name, stream_name, chunk, total_bytesize)
228
+ chunk = [event]
229
+ total_bytesize = event_bytesize
230
+ else
231
+ chunk << event
232
+ total_bytesize += event_bytesize
233
+ end
234
+ end
235
+
236
+ unless chunk.empty?
237
+ put_events(group_name, stream_name, chunk, total_bytesize)
238
+ end
239
+ end
240
+
241
+ def put_events(group_name, stream_name, events, events_bytesize)
242
+ response = nil
243
+ retry_count = 0
244
+
245
+ until response
246
+ args = {
247
+ log_events: events,
248
+ log_group_name: group_name,
249
+ log_stream_name: stream_name,
250
+ }
251
+
252
+ token = next_sequence_token(group_name, stream_name)
253
+ args[:sequence_token] = token if token
254
+
255
+ begin
256
+ t = Time.now
257
+ response = @logs.put_log_events(args)
258
+ log.debug "Called PutLogEvents API", {
259
+ "group" => group_name,
260
+ "stream" => stream_name,
261
+ "events_count" => events.size,
262
+ "events_bytesize" => events_bytesize,
263
+ "sequence_token" => token,
264
+ "thread" => Thread.current.object_id,
265
+ "request_sec" => Time.now - t,
266
+ }
267
+ log.warn response.rejected_log_events_info if response.rejected_log_events_info != nil
268
+ rescue Aws::CloudWatchLogs::Errors::InvalidSequenceTokenException, Aws::CloudWatchLogs::Errors::DataAlreadyAcceptedException => err
269
+ sleep 1 # to avoid too many API calls
270
+ log_stream = find_log_stream(group_name, stream_name)
271
+ store_next_sequence_token(group_name, stream_name, log_stream.upload_sequence_token)
272
+ log.warn "updating upload sequence token forcefully because unrecoverable error occured", {
273
+ "error" => err,
274
+ "log_group" => group_name,
275
+ "log_stream" => stream_name,
276
+ "new_sequence_token" => token,
277
+ }
278
+ rescue Aws::CloudWatchLogs::Errors::ResourceNotFoundException => err
279
+ if @auto_create_stream && err.message == 'The specified log stream does not exist.'
280
+ log.warn 'Creating log stream because "The specified log stream does not exist." error is got', {
281
+ "error" => err,
282
+ "log_group" => group_name,
283
+ "log_stream" => stream_name,
284
+ }
285
+ create_log_stream(group_name, stream_name)
286
+ delete_sequence_token(group_name, stream_name)
287
+ else
288
+ raise err
289
+ end
290
+ rescue Aws::CloudWatchLogs::Errors::ThrottlingException => err
291
+ if !@put_log_events_disable_retry_limit && @put_log_events_retry_limit < retry_count
292
+ log.error "failed to PutLogEvents and discard logs because retry count exceeded put_log_events_retry_limit", {
293
+ "error_class" => err.class.to_s,
294
+ "error" => err.message,
295
+ }
296
+ return
297
+ else
298
+ sleep_sec = @put_log_events_retry_wait * (2 ** retry_count)
299
+ sleep_sec += sleep_sec * (0.25 * (rand - 0.5))
300
+ log.warn "failed to PutLogEvents", {
301
+ "next_retry" => Time.now + sleep_sec,
302
+ "error_class" => err.class.to_s,
303
+ "error" => err.message,
304
+ }
305
+ sleep(sleep_sec)
306
+ retry_count += 1
307
+ end
308
+ end
309
+ end
310
+
311
+ if 0 < retry_count
312
+ log.warn "retry succeeded"
313
+ end
314
+
315
+ store_next_sequence_token(group_name, stream_name, response.next_sequence_token)
316
+ end
317
+
318
+ def create_log_group(group_name)
319
+ begin
320
+ @logs.create_log_group(log_group_name: group_name)
321
+ @sequence_tokens[group_name] = {}
322
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
323
+ log.debug "Log group '#{group_name}' already exists"
324
+ end
325
+ end
326
+
327
+ def create_log_stream(group_name, stream_name)
328
+ begin
329
+ @logs.create_log_stream(log_group_name: group_name, log_stream_name: stream_name)
330
+ @sequence_tokens[group_name] ||= {}
331
+ @sequence_tokens[group_name][stream_name] = nil
332
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
333
+ log.debug "Log stream '#{stream_name}' already exists"
334
+ end
335
+ end
336
+
337
+ def log_group_exists?(group_name)
338
+ if @sequence_tokens[group_name]
339
+ true
340
+ elsif @logs.describe_log_groups.any? {|page| page.log_groups.any? {|i| i.log_group_name == group_name } }
341
+ @sequence_tokens[group_name] = {}
342
+ true
343
+ else
344
+ false
345
+ end
346
+ end
347
+
348
+ def log_stream_exists?(group_name, stream_name)
349
+ if not @sequence_tokens[group_name]
350
+ false
351
+ elsif @sequence_tokens[group_name].has_key?(stream_name)
352
+ true
353
+ elsif (log_stream = find_log_stream(group_name, stream_name))
354
+ @sequence_tokens[group_name][stream_name] = log_stream.upload_sequence_token
355
+ true
356
+ else
357
+ false
358
+ end
359
+ end
360
+
361
+ def find_log_stream(group_name, stream_name)
362
+ next_token = nil
363
+ loop do
364
+ response = @logs.describe_log_streams(log_group_name: group_name, log_stream_name_prefix: stream_name, next_token: next_token)
365
+ if (log_stream = response.log_streams.find {|i| i.log_stream_name == stream_name })
366
+ return log_stream
367
+ end
368
+ if response.next_token.nil?
369
+ break
370
+ end
371
+ next_token = response.next_token
372
+ sleep 0.1
373
+ end
374
+ nil
375
+ end
376
+ end
377
+ end
@@ -0,0 +1,157 @@
1
+ require 'test_helper'
2
+
3
+ class CloudwatchLogsInputTest < Test::Unit::TestCase
4
+ include CloudwatchLogsTestHelper
5
+
6
+ def setup
7
+ Fluent::Test.setup
8
+ require 'fluent/plugin/in_cloudwatch_logs'
9
+
10
+ end
11
+
12
+ def teardown
13
+ clear_log_group
14
+ end
15
+
16
+ def test_configure
17
+ d = create_driver(<<-EOC)
18
+ type cloudwatch_logs
19
+ aws_key_id test_id
20
+ aws_sec_key test_key
21
+ region us-east-1
22
+ tag test
23
+ log_group_name group
24
+ log_stream_name stream
25
+ use_log_stream_name_prefix true
26
+ state_file /tmp/state
27
+ EOC
28
+
29
+ assert_equal('test_id', d.instance.aws_key_id)
30
+ assert_equal('test_key', d.instance.aws_sec_key)
31
+ assert_equal('us-east-1', d.instance.region)
32
+ assert_equal('test', d.instance.tag)
33
+ assert_equal('group', d.instance.log_group_name)
34
+ assert_equal('stream', d.instance.log_stream_name)
35
+ assert_equal(true, d.instance.use_log_stream_name_prefix)
36
+ assert_equal('/tmp/state', d.instance.state_file)
37
+ end
38
+
39
+ def test_emit
40
+ create_log_stream
41
+
42
+ time_ms = (Time.now.to_f * 1000).floor
43
+ put_log_events([
44
+ {timestamp: time_ms, message: '{"cloudwatch":"logs1"}'},
45
+ {timestamp: time_ms, message: '{"cloudwatch":"logs2"}'},
46
+ ])
47
+
48
+ sleep 5
49
+
50
+ d = create_driver
51
+ d.run do
52
+ sleep 5
53
+ end
54
+
55
+ emits = d.emits
56
+ assert_equal(2, emits.size)
57
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs1'}], emits[0])
58
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs2'}], emits[1])
59
+ end
60
+
61
+ def test_emit_width_format
62
+ create_log_stream
63
+
64
+ time_ms = (Time.now.to_f * 1000).floor
65
+ put_log_events([
66
+ {timestamp: time_ms, message: 'logs1'},
67
+ {timestamp: time_ms, message: 'logs2'},
68
+ ])
69
+
70
+ sleep 5
71
+
72
+ d = create_driver(<<-EOC)
73
+ tag test
74
+ type cloudwatch_logs
75
+ log_group_name #{log_group_name}
76
+ log_stream_name #{log_stream_name}
77
+ state_file /tmp/state
78
+ format /^(?<cloudwatch>[^ ]*)?/
79
+ #{aws_key_id}
80
+ #{aws_sec_key}
81
+ #{region}
82
+ EOC
83
+
84
+ d.run do
85
+ sleep 5
86
+ end
87
+
88
+ emits = d.emits
89
+ assert_equal(2, emits.size)
90
+ assert_equal('test', emits[0][0])
91
+ assert_in_delta((time_ms / 1000).floor, emits[0][1], 10)
92
+ assert_equal({'cloudwatch' => 'logs1'}, emits[0][2])
93
+ assert_equal('test', emits[1][0])
94
+ assert_in_delta((time_ms / 1000).floor, emits[1][1], 10)
95
+ assert_equal({'cloudwatch' => 'logs2'}, emits[1][2])
96
+ end
97
+
98
+ def test_emit_with_prefix
99
+ new_log_stream("testprefix")
100
+ create_log_stream
101
+
102
+ time_ms = (Time.now.to_f * 1000).floor
103
+ put_log_events([
104
+ {timestamp: time_ms, message: '{"cloudwatch":"logs1"}'},
105
+ {timestamp: time_ms, message: '{"cloudwatch":"logs2"}'},
106
+ ])
107
+
108
+ new_log_stream("testprefix")
109
+ create_log_stream
110
+ put_log_events([
111
+ {timestamp: time_ms, message: '{"cloudwatch":"logs3"}'},
112
+ {timestamp: time_ms, message: '{"cloudwatch":"logs4"}'},
113
+ ])
114
+
115
+ sleep 5
116
+
117
+ d = create_driver(<<-EOC)
118
+ tag test
119
+ type cloudwatch_logs
120
+ log_group_name #{log_group_name}
121
+ log_stream_name testprefix
122
+ use_log_stream_name_prefix true
123
+ state_file /tmp/state
124
+ #{aws_key_id}
125
+ #{aws_sec_key}
126
+ #{region}
127
+ EOC
128
+ d.run do
129
+ sleep 5
130
+ end
131
+
132
+ emits = d.emits
133
+ assert_equal(4, emits.size)
134
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs1'}], emits[0])
135
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs2'}], emits[1])
136
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs3'}], emits[2])
137
+ assert_equal(['test', (time_ms / 1000).floor, {'cloudwatch' => 'logs4'}], emits[3])
138
+ end
139
+
140
+ private
141
+ def default_config
142
+ <<-EOC
143
+ tag test
144
+ type cloudwatch_logs
145
+ log_group_name #{log_group_name}
146
+ log_stream_name #{log_stream_name}
147
+ state_file /tmp/state
148
+ #{aws_key_id}
149
+ #{aws_sec_key}
150
+ #{region}
151
+ EOC
152
+ end
153
+
154
+ def create_driver(conf = default_config)
155
+ Fluent::Test::InputTestDriver.new(Fluent::CloudwatchLogsInput).configure(conf)
156
+ end
157
+ end