fluent-plugin-test 0.0.17 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,574 @@
1
+ require 'fluent/plugin/output'
2
+ require 'fluent/msgpack_factory'
3
+ require 'thread'
4
+ require 'yajl'
5
+
6
+ module Fluent::Plugin
7
+ class CloudwatchLogsOutput < Output
8
+ Fluent::Plugin.register_output('cloudwatch_logs', self)
9
+
10
+ class TooLargeEventError < Fluent::UnrecoverableError; end
11
+
12
+ helpers :compat_parameters, :inject, :formatter
13
+
14
+ DEFAULT_BUFFER_TYPE = "memory"
15
+
16
+ config_param :aws_key_id, :string, :default => nil, :secret => true
17
+ config_param :aws_sec_key, :string, :default => nil, :secret => true
18
+ config_param :aws_instance_profile_credentials_retries, :integer, default: nil
19
+ config_param :aws_use_sts, :bool, default: false
20
+ config_param :aws_sts_role_arn, :string, default: nil
21
+ config_param :aws_sts_session_name, :string, default: 'fluentd'
22
+ config_param :aws_sts_external_id, :string, default: nil
23
+ config_param :aws_sts_policy, :string, default: nil
24
+ config_param :aws_sts_duration_seconds, :time, default: nil
25
+ config_param :aws_sts_endpoint_url, :string, default: nil
26
+ config_param :aws_ecs_authentication, :bool, default: false
27
+ config_param :region, :string, :default => nil
28
+ config_param :endpoint, :string, :default => nil
29
+ config_param :ssl_verify_peer, :bool, :default => true
30
+ config_param :log_group_name, :string, :default => nil
31
+ config_param :log_stream_name, :string, :default => nil
32
+ config_param :auto_create_stream, :bool, default: false
33
+ config_param :message_keys, :array, :default => [], value_type: :string
34
+ config_param :max_message_length, :integer, :default => nil
35
+ config_param :max_events_per_batch, :integer, :default => 10000
36
+ config_param :use_tag_as_group, :bool, :default => false # TODO: Rename to use_tag_as_group_name ?
37
+ config_param :use_tag_as_stream, :bool, :default => false # TODO: Rename to use_tag_as_stream_name ?
38
+ config_param :log_group_name_key, :string, :default => nil
39
+ config_param :log_stream_name_key, :string, :default => nil
40
+ config_param :remove_log_group_name_key, :bool, :default => false
41
+ config_param :remove_log_stream_name_key, :bool, :default => false
42
+ config_param :http_proxy, :string, default: nil
43
+ config_param :put_log_events_retry_wait, :time, default: 1.0
44
+ config_param :put_log_events_retry_limit, :integer, default: 17
45
+ config_param :put_log_events_disable_retry_limit, :bool, default: false
46
+ config_param :concurrency, :integer, default: 1
47
+ config_param :log_group_aws_tags, :hash, default: nil
48
+ config_param :log_group_aws_tags_key, :string, default: nil
49
+ config_param :remove_log_group_aws_tags_key, :bool, default: false
50
+ config_param :retention_in_days, :integer, default: nil
51
+ config_param :retention_in_days_key, :string, default: nil
52
+ config_param :remove_retention_in_days_key, :bool, default: false
53
+ config_param :json_handler, :enum, list: [:yajl, :json], :default => :yajl
54
+ config_param :log_rejected_request, :bool, :default => false
55
+ config_section :web_identity_credentials, multi: false do
56
+ config_param :role_arn, :string
57
+ config_param :role_session_name, :string
58
+ config_param :web_identity_token_file, :string, default: nil #required
59
+ config_param :policy, :string, default: nil
60
+ config_param :duration_seconds, :time, default: nil
61
+ end
62
+
63
+ config_section :buffer do
64
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
65
+ end
66
+ config_section :format do
67
+ config_set_default :@type, 'json'
68
+ end
69
+
70
+ MAX_EVENTS_SIZE = 1_048_576
71
+ MAX_EVENT_SIZE = 1024 * 1024
72
+ EVENT_HEADER_SIZE = 26
73
+
74
+ def initialize
75
+ super
76
+
77
+ require 'aws-sdk-cloudwatchlogs'
78
+ end
79
+
80
+ def configure(conf)
81
+ compat_parameters_convert(conf, :buffer, :inject)
82
+ super
83
+
84
+ unless [conf['log_group_name'], conf['use_tag_as_group'], conf['log_group_name_key']].compact.size == 1
85
+ raise Fluent::ConfigError, "Set only one of log_group_name, use_tag_as_group and log_group_name_key"
86
+ end
87
+
88
+ unless [conf['log_stream_name'], conf['use_tag_as_stream'], conf['log_stream_name_key']].compact.size == 1
89
+ raise Fluent::ConfigError, "Set only one of log_stream_name, use_tag_as_stream and log_stream_name_key"
90
+ end
91
+
92
+ if [conf['log_group_aws_tags'], conf['log_group_aws_tags_key']].compact.size > 1
93
+ raise ConfigError, "Set only one of log_group_aws_tags, log_group_aws_tags_key"
94
+ end
95
+
96
+ if [conf['retention_in_days'], conf['retention_in_days_key']].compact.size > 1
97
+ raise ConfigError, "Set only one of retention_in_days, retention_in_days_key"
98
+ end
99
+
100
+ formatter_conf = conf.elements('format').first
101
+ @formatter_proc = unless formatter_conf
102
+ unless @message_keys.empty?
103
+ Proc.new { |tag, time, record|
104
+ @message_keys.map{|k| record[k].to_s }.reject{|e| e.empty? }.join(' ')
105
+ }
106
+ else
107
+ Proc.new { |tag, time, record|
108
+ @json_handler.dump(record)
109
+ }
110
+ end
111
+ else
112
+ formatter = formatter_create(usage: 'cloudwatch-logs-plugin', conf: formatter_conf)
113
+ formatter.method(:format)
114
+ end
115
+ end
116
+
117
+ def start
118
+ super
119
+
120
+ options = {}
121
+ options[:logger] = log if log
122
+ options[:log_level] = :debug if log
123
+ options[:region] = @region if @region
124
+ options[:endpoint] = @endpoint if @endpoint
125
+ options[:ssl_verify_peer] = @ssl_verify_peer
126
+ options[:instance_profile_credentials_retries] = @aws_instance_profile_credentials_retries if @aws_instance_profile_credentials_retries
127
+
128
+ if @aws_use_sts
129
+ Aws.config[:region] = options[:region]
130
+ credentials_options = {
131
+ role_arn: @aws_sts_role_arn,
132
+ role_session_name: @aws_sts_session_name,
133
+ external_id: @aws_sts_external_id,
134
+ policy: @aws_sts_policy,
135
+ duration_seconds: @aws_sts_duration_seconds
136
+ }
137
+ credentials_options[:sts_endpoint_url] = @aws_sts_endpoint_url if @aws_sts_endpoint_url
138
+ if @region and @aws_sts_endpoint_url
139
+ credentials_options[:client] = Aws::STS::Client.new(:region => @region, endpoint: @aws_sts_endpoint_url)
140
+ elsif @region
141
+ credentials_options[:client] = Aws::STS::Client.new(:region => @region)
142
+ end
143
+ options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
144
+ elsif @web_identity_credentials
145
+ c = @web_identity_credentials
146
+ credentials_options = {}
147
+ credentials_options[:role_arn] = c.role_arn
148
+ credentials_options[:role_session_name] = c.role_session_name
149
+ credentials_options[:web_identity_token_file] = c.web_identity_token_file
150
+ credentials_options[:policy] = c.policy if c.policy
151
+ credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
152
+ if @region
153
+ credentials_options[:client] = Aws::STS::Client.new(:region => @region)
154
+ end
155
+ options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options)
156
+ elsif @aws_ecs_authentication
157
+ # collect AWS credential from ECS relative uri ENV variable
158
+ aws_container_credentials_relative_uri = ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"]
159
+ options[:credentials] = Aws::ECSCredentials.new({credential_path: aws_container_credentials_relative_uri}).credentials
160
+ else
161
+ options[:credentials] = Aws::Credentials.new(@aws_key_id, @aws_sec_key) if @aws_key_id && @aws_sec_key
162
+ end
163
+ options[:http_proxy] = @http_proxy if @http_proxy
164
+ @logs ||= Aws::CloudWatchLogs::Client.new(options)
165
+ @sequence_tokens = {}
166
+ @store_next_sequence_token_mutex = Mutex.new
167
+
168
+ log.debug "Aws::CloudWatchLogs::Client initialized: log.level #{log.level} => #{options[:log_level]}"
169
+
170
+ @json_handler = case @json_handler
171
+ when :yajl
172
+ Yajl
173
+ when :json
174
+ JSON
175
+ end
176
+ end
177
+
178
+ def format(tag, time, record)
179
+ record = inject_values_to_record(tag, time, record)
180
+ Fluent::MessagePackFactory.msgpack_packer.pack([tag, time, record]).to_s
181
+ end
182
+
183
+ def formatted_to_msgpack_binary?
184
+ true
185
+ end
186
+
187
+ def multi_workers_ready?
188
+ true
189
+ end
190
+
191
+ def write(chunk)
192
+ log_group_name = extract_placeholders(@log_group_name, chunk) if @log_group_name
193
+ log_stream_name = extract_placeholders(@log_stream_name, chunk) if @log_stream_name
194
+ aws_tags = @log_group_aws_tags.each {|k, v|
195
+ @log_group_aws_tags[extract_placeholders(k, chunk)] = extract_placeholders(v, chunk)
196
+ } if @log_group_aws_tags
197
+
198
+ queue = Thread::Queue.new
199
+
200
+ chunk.enum_for(:msgpack_each).select {|tag, time, record|
201
+ if record.nil?
202
+ log.warn "record is nil (tag=#{tag})"
203
+ false
204
+ else
205
+ true
206
+ end
207
+ }.group_by {|tag, time, record|
208
+ group = case
209
+ when @use_tag_as_group
210
+ tag
211
+ when @log_group_name_key
212
+ if @remove_log_group_name_key
213
+ record.delete(@log_group_name_key)
214
+ else
215
+ record[@log_group_name_key]
216
+ end
217
+ else
218
+ log_group_name
219
+ end
220
+
221
+ stream = case
222
+ when @use_tag_as_stream
223
+ tag
224
+ when @log_stream_name_key
225
+ if @remove_log_stream_name_key
226
+ record.delete(@log_stream_name_key)
227
+ else
228
+ record[@log_stream_name_key]
229
+ end
230
+ else
231
+ log_stream_name
232
+ end
233
+
234
+ [group, stream]
235
+ }.each {|group_stream, rs|
236
+ group_name, stream_name = group_stream
237
+
238
+ if stream_name.nil?
239
+ log.warn "stream_name is nil (group_name=#{group_name})"
240
+ next
241
+ end
242
+
243
+ unless log_group_exists?(group_name)
244
+ #rs = [[name, timestamp, record],[name,timestamp,record]]
245
+ #get tags and retention from first record
246
+ #as we create log group only once, values from first record will persist
247
+ record = rs[0][2]
248
+
249
+ awstags = aws_tags
250
+ unless @log_group_aws_tags_key.nil?
251
+ if @remove_log_group_aws_tags_key
252
+ awstags = record.delete(@log_group_aws_tags_key)
253
+ else
254
+ awstags = record[@log_group_aws_tags_key]
255
+ end
256
+ end
257
+
258
+ retention_in_days = @retention_in_days
259
+ unless @retention_in_days_key.nil?
260
+ if @remove_retention_in_days_key
261
+ retention_in_days = record.delete(@retention_in_days_key)
262
+ else
263
+ retention_in_days = record[@retention_in_days_key]
264
+ end
265
+ end
266
+
267
+ if @auto_create_stream
268
+ create_log_group(group_name, awstags, retention_in_days)
269
+ else
270
+ log.warn "Log group '#{group_name}' does not exist"
271
+ next
272
+ end
273
+ end
274
+
275
+ unless log_stream_exists?(group_name, stream_name)
276
+ if @auto_create_stream
277
+ create_log_stream(group_name, stream_name)
278
+ else
279
+ log.warn "Log stream '#{stream_name}' does not exist"
280
+ next
281
+ end
282
+ end
283
+
284
+ events = []
285
+ rs.each do |t, time, record|
286
+ if @log_group_aws_tags_key && @remove_log_group_aws_tags_key
287
+ record.delete(@log_group_aws_tags_key)
288
+ end
289
+
290
+ if @retention_in_days_key && @remove_retention_in_days_key
291
+ record.delete(@retention_in_days_key)
292
+ end
293
+
294
+ record = drop_empty_record(record)
295
+
296
+ time_ms = (time.to_f * 1000).floor
297
+
298
+ scrub_record!(record)
299
+ message = @formatter_proc.call(t, time, record)
300
+
301
+ if message.empty?
302
+ log.warn "Within specified message_key(s): (#{@message_keys.join(',')}) do not have non-empty record. Skip."
303
+ next
304
+ end
305
+
306
+ if @max_message_length
307
+ message = message.slice(0, @max_message_length)
308
+ end
309
+
310
+ events << {timestamp: time_ms, message: message}
311
+ end
312
+ # The log events in the batch must be in chronological ordered by their timestamp.
313
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
314
+ events = events.sort_by {|e| e[:timestamp] }
315
+
316
+ queue << [group_name, stream_name, events]
317
+ }
318
+
319
+ @concurrency.times do
320
+ queue << nil
321
+ end
322
+ threads = @concurrency.times.map do |i|
323
+ Thread.start do
324
+ while job = queue.shift
325
+ group_name, stream_name, events = job
326
+ put_events_by_chunk(group_name, stream_name, events)
327
+ end
328
+ end
329
+ end
330
+ threads.each(&:join)
331
+ end
332
+
333
+ private
334
+
335
+ def drop_empty_record(record)
336
+ new_record = record.dup
337
+ new_record.each_key do |k|
338
+ if new_record[k] == ""
339
+ new_record.delete(k)
340
+ end
341
+ end
342
+ new_record
343
+ end
344
+
345
+ def scrub_record!(record)
346
+ case record
347
+ when Hash
348
+ record.each_value {|v| scrub_record!(v) }
349
+ when Array
350
+ record.each {|v| scrub_record!(v) }
351
+ when String
352
+ # The AWS API requires UTF-8 encoding
353
+ # https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CloudWatchLogsConcepts.html
354
+ record.force_encoding('UTF-8')
355
+ record.scrub!
356
+ end
357
+ end
358
+
359
+ def delete_sequence_token(group_name, stream_name)
360
+ @sequence_tokens[group_name].delete(stream_name)
361
+ end
362
+
363
+ def next_sequence_token(group_name, stream_name)
364
+ @sequence_tokens[group_name][stream_name]
365
+ end
366
+
367
+ def store_next_sequence_token(group_name, stream_name, token)
368
+ @store_next_sequence_token_mutex.synchronize do
369
+ @sequence_tokens[group_name][stream_name] = token
370
+ end
371
+ end
372
+
373
+ def put_events_by_chunk(group_name, stream_name, events)
374
+ chunk = []
375
+
376
+ # The maximum batch size is 1,048,576 bytes, and this size is calculated as the sum of all event messages in UTF-8, plus 26 bytes for each log event.
377
+ # http://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_PutLogEvents.html
378
+ total_bytesize = 0
379
+ while event = events.shift
380
+ event_bytesize = event[:message].bytesize + EVENT_HEADER_SIZE
381
+ if MAX_EVENT_SIZE < event_bytesize
382
+ raise TooLargeEventError, "Log event in #{group_name} is discarded because it is too large: #{event_bytesize} bytes exceeds limit of #{MAX_EVENT_SIZE}"
383
+ end
384
+
385
+ new_chunk = chunk + [event]
386
+
387
+ chunk_span_too_big = new_chunk.size > 1 && new_chunk[-1][:timestamp] - new_chunk[0][:timestamp] >= 1000 * 60 * 60 * 24
388
+ chunk_too_big = total_bytesize + event_bytesize > MAX_EVENTS_SIZE
389
+ chunk_too_long = @max_events_per_batch && chunk.size >= @max_events_per_batch
390
+ if chunk_too_big or chunk_span_too_big or chunk_too_long
391
+ put_events(group_name, stream_name, chunk, total_bytesize)
392
+ chunk = [event]
393
+ total_bytesize = event_bytesize
394
+ else
395
+ chunk << event
396
+ total_bytesize += event_bytesize
397
+ end
398
+ end
399
+
400
+ unless chunk.empty?
401
+ put_events(group_name, stream_name, chunk, total_bytesize)
402
+ end
403
+ end
404
+
405
+ def put_events(group_name, stream_name, events, events_bytesize)
406
+ response = nil
407
+ retry_count = 0
408
+
409
+ until response
410
+ args = {
411
+ log_events: events,
412
+ log_group_name: group_name,
413
+ log_stream_name: stream_name,
414
+ }
415
+
416
+ token = next_sequence_token(group_name, stream_name)
417
+ args[:sequence_token] = token if token
418
+
419
+ begin
420
+ t = Time.now
421
+ response = @logs.put_log_events(args)
422
+ request = {
423
+ "group" => group_name,
424
+ "stream" => stream_name,
425
+ "events_count" => events.size,
426
+ "events_bytesize" => events_bytesize,
427
+ "sequence_token" => token,
428
+ "thread" => Thread.current.object_id,
429
+ "request_sec" => Time.now - t,
430
+ }
431
+ if response.rejected_log_events_info != nil && @log_rejected_request
432
+ log.warn response.rejected_log_events_info
433
+ log.warn "Called PutLogEvents API", request
434
+ else
435
+ log.debug "Called PutLogEvents API", request
436
+ end
437
+ rescue Aws::CloudWatchLogs::Errors::InvalidSequenceTokenException, Aws::CloudWatchLogs::Errors::DataAlreadyAcceptedException => err
438
+ sleep 1 # to avoid too many API calls
439
+ store_next_sequence_token(group_name, stream_name, err.expected_sequence_token)
440
+ log.warn "updating upload sequence token forcefully because unrecoverable error occured", {
441
+ "error" => err,
442
+ "log_group" => group_name,
443
+ "log_stream" => stream_name,
444
+ "new_sequence_token" => token,
445
+ }
446
+ retry_count += 1
447
+ rescue Aws::CloudWatchLogs::Errors::ResourceNotFoundException => err
448
+ if @auto_create_stream && err.message == 'The specified log stream does not exist.'
449
+ log.warn 'Creating log stream because "The specified log stream does not exist." error is got', {
450
+ "error" => err,
451
+ "log_group" => group_name,
452
+ "log_stream" => stream_name,
453
+ }
454
+ create_log_stream(group_name, stream_name)
455
+ delete_sequence_token(group_name, stream_name)
456
+ retry_count += 1
457
+ else
458
+ raise err
459
+ end
460
+ rescue Aws::CloudWatchLogs::Errors::ThrottlingException => err
461
+ if @put_log_events_retry_limit < 1
462
+ log.warn "failed to PutLogEvents and discard logs because put_log_events_retry_limit is less than 1", {
463
+ "error_class" => err.class.to_s,
464
+ "error" => err.message,
465
+ }
466
+ return
467
+ elsif !@put_log_events_disable_retry_limit && @put_log_events_retry_limit < retry_count
468
+ log.error "failed to PutLogEvents and discard logs because retry count exceeded put_log_events_retry_limit", {
469
+ "error_class" => err.class.to_s,
470
+ "error" => err.message,
471
+ }
472
+ return
473
+ else
474
+ sleep_sec = @put_log_events_retry_wait * (2 ** retry_count)
475
+ sleep_sec += sleep_sec * (0.25 * (rand - 0.5))
476
+ log.warn "failed to PutLogEvents", {
477
+ "next_retry" => Time.now + sleep_sec,
478
+ "error_class" => err.class.to_s,
479
+ "error" => err.message,
480
+ }
481
+ sleep(sleep_sec)
482
+ retry_count += 1
483
+ end
484
+ end
485
+ end
486
+
487
+ if 0 < retry_count
488
+ log.warn "retry succeeded"
489
+ end
490
+
491
+ store_next_sequence_token(group_name, stream_name, response.next_sequence_token)
492
+ end
493
+
494
+ def create_log_group(group_name, log_group_aws_tags = nil, retention_in_days = nil)
495
+ begin
496
+ @logs.create_log_group(log_group_name: group_name, tags: log_group_aws_tags)
497
+ unless retention_in_days.nil?
498
+ put_retention_policy(group_name, retention_in_days)
499
+ end
500
+ @sequence_tokens[group_name] = {}
501
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
502
+ log.debug "Log group '#{group_name}' already exists"
503
+ end
504
+ end
505
+
506
+ def put_retention_policy(group_name, retention_in_days)
507
+ begin
508
+ @logs.put_retention_policy({
509
+ log_group_name: group_name,
510
+ retention_in_days: retention_in_days
511
+ })
512
+ rescue Aws::CloudWatchLogs::Errors::InvalidParameterException => error
513
+ log.warn "failed to set retention policy for Log group '#{group_name}' with error #{error.backtrace}"
514
+ end
515
+ end
516
+
517
+ def create_log_stream(group_name, stream_name)
518
+ begin
519
+ @logs.create_log_stream(log_group_name: group_name, log_stream_name: stream_name)
520
+ @sequence_tokens[group_name] ||= {}
521
+ @sequence_tokens[group_name][stream_name] = nil
522
+ rescue Aws::CloudWatchLogs::Errors::ResourceAlreadyExistsException
523
+ log.debug "Log stream '#{stream_name}' already exists"
524
+ end
525
+ end
526
+
527
+ def log_group_exists?(group_name)
528
+ if @sequence_tokens[group_name]
529
+ true
530
+ elsif check_log_group_existence(group_name)
531
+ @sequence_tokens[group_name] = {}
532
+ true
533
+ else
534
+ false
535
+ end
536
+ end
537
+
538
+ def check_log_group_existence(group_name)
539
+ response = @logs.describe_log_groups(log_group_name_prefix: group_name)
540
+ response.each {|page|
541
+ if page.log_groups.find {|i| i.log_group_name == group_name }
542
+ return true
543
+ end
544
+ }
545
+
546
+ false
547
+ end
548
+
549
+ def log_stream_exists?(group_name, stream_name)
550
+ if not @sequence_tokens[group_name]
551
+ false
552
+ elsif @sequence_tokens[group_name].has_key?(stream_name)
553
+ true
554
+ elsif (log_stream = find_log_stream(group_name, stream_name))
555
+ @sequence_tokens[group_name][stream_name] = log_stream.upload_sequence_token
556
+ true
557
+ else
558
+ false
559
+ end
560
+ end
561
+
562
+ def find_log_stream(group_name, stream_name)
563
+ response = @logs.describe_log_streams(log_group_name: group_name, log_stream_name_prefix: stream_name)
564
+ response.each {|page|
565
+ if (log_stream = page.log_streams.find {|i| i.log_stream_name == stream_name })
566
+ return log_stream
567
+ end
568
+ sleep 0.1
569
+ }
570
+ end
571
+
572
+ nil
573
+ end
574
+ end