fluent-plugin-scalyr-threaded 1.0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ #
2
+ # Scalyr Output Plugin for Fluentd
3
+ #
4
+ # Copyright (C) 2015 Scalyr, Inc.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+
19
+ require 'fluent/plugin/output'
20
+ require 'fluent/plugin/scalyr-exceptions'
21
+ require 'fluent/plugin_helper/compat_parameters'
22
+ require 'json'
23
+ require 'net/http'
24
+ require 'net/https'
25
+ require 'rbzip2'
26
+ require 'stringio'
27
+ require 'zlib'
28
+ require 'securerandom'
29
+ require 'socket'
30
+ require 'thread'
31
+
32
+ module ScalyrThreaded
33
+ class ScalyrOut < Fluent::Plugin::Output
34
+ Fluent::Plugin.register_output( 'scalyr_threaded', self )
35
+ helpers :compat_parameters
36
+ helpers :event_emitter
37
+
38
+ config_param :api_write_token, :string
39
+ config_param :server_attributes, :hash, :default => nil
40
+ config_param :use_hostname_for_serverhost, :bool, :default => true
41
+ config_param :scalyr_server, :string, :default => "https://agent.scalyr.com/"
42
+ config_param :ssl_ca_bundle_path, :string, :default => "/etc/ssl/certs/ca-bundle.crt"
43
+ config_param :ssl_verify_peer, :bool, :default => true
44
+ config_param :ssl_verify_depth, :integer, :default => 5
45
+ config_param :message_field, :string, :default => "message"
46
+ config_param :max_request_buffer, :integer, :default => 1024*1024
47
+ config_param :force_message_encoding, :string, :default => nil
48
+ config_param :replace_invalid_utf8, :bool, :default => false
49
+ config_param :compression_type, :string, :default => nil #Valid options are bz2, deflate or None. Defaults to None.
50
+ config_param :compression_level, :integer, :default => 9 #An int containing the compression level of compression to use, from 1-9. Defaults to 9 (max)
51
+
52
+ config_section :buffer do
53
+ config_set_default :retry_max_times, 40 #try a maximum of 40 times before discarding
54
+ config_set_default :retry_max_interval, 30 #wait a maximum of 30 seconds per retry
55
+ config_set_default :retry_wait, 5 #wait a minimum of 5 seconds per retry
56
+ config_set_default :flush_interval, 5 #default flush interval of 5 seconds
57
+ config_set_default :chunk_limit_size, 1024*100 #default chunk size of 100k
58
+ config_set_default :queue_limit_length, 1024 #default queue size of 1024
59
+ end
60
+
61
+ # support for version 0.14.0:
62
+ def compat_parameters_default_chunk_key
63
+ ""
64
+ end
65
+
66
+ def formatted_to_msgpack_binary
67
+ true
68
+ end
69
+
70
+ def configure( conf )
71
+
72
+ if conf.elements('buffer').empty?
73
+ $log.warn "Pre 0.14.0 configuration file detected. Please consider updating your configuration file"
74
+ end
75
+
76
+ compat_parameters_buffer( conf, default_chunk_key: '' )
77
+
78
+ super
79
+
80
+ if @buffer.chunk_limit_size > 1024*1024
81
+ $log.warn "Buffer chunk size is greater than 1Mb. This may result in requests being rejected by Scalyr"
82
+ end
83
+
84
+ if @max_request_buffer > (1024*1024*3)
85
+ $log.warn "Maximum request buffer > 3Mb. This may result in requests being rejected by Scalyr"
86
+ end
87
+
88
+ @message_encoding = nil
89
+ if @force_message_encoding.to_s != ''
90
+ begin
91
+ @message_encoding = Encoding.find( @force_message_encoding )
92
+ $log.debug "Forcing message encoding to '#{@force_message_encoding}'"
93
+ rescue ArgumentError
94
+ $log.warn "No encoding '#{@force_message_encoding}' found. Ignoring"
95
+ end
96
+ end
97
+
98
+ #evaluate any statements in string value of the server_attributes object
99
+ if @server_attributes
100
+ new_attributes = {}
101
+ @server_attributes.each do |key, value|
102
+ if value.is_a?( String )
103
+ m = /^\#{(.*)}$/.match( value )
104
+ if m
105
+ new_attributes[key] = eval( m[1] )
106
+ else
107
+ new_attributes[key] = value
108
+ end
109
+ end
110
+ end
111
+ @server_attributes = new_attributes
112
+ end
113
+
114
+ # See if we should use the hostname as the server_attributes.serverHost
115
+ if @use_hostname_for_serverhost
116
+
117
+ # ensure server_attributes is not nil
118
+ if @server_attributes.nil?
119
+ @server_attributes = {}
120
+ end
121
+
122
+ # only set serverHost if it doesn't currently exist in server_attributes
123
+ # Note: Use strings rather than symbols for the key, because keys coming
124
+ # from the config file will be strings
125
+ if !@server_attributes.key? 'serverHost'
126
+ @server_attributes['serverHost'] = Socket.gethostname
127
+ end
128
+ end
129
+
130
+ @scalyr_server << '/' unless @scalyr_server.end_with?('/')
131
+
132
+ @add_events_uri = URI @scalyr_server + "addEvents"
133
+
134
+ num_threads = @buffer_config.flush_thread_count
135
+
136
+ #forcibly limit the number of threads to 1 for now, to ensure requests always have incrementing timestamps
137
+ # raise Fluent::ConfigError, "num_threads is currently limited to 1. You specified #{num_threads}." if num_threads > 1
138
+ end
139
+
140
+ def start
141
+ super
142
+ $log.info "Scalyr Threaded Fluentd Plugin ID - #{self.plugin_id()}"
143
+ #Generate a session id. This will be called once for each <match> in fluent.conf that uses scalyr
144
+ @session = SecureRandom.uuid
145
+
146
+ @sync = Mutex.new
147
+ #the following variables are all under the control of the above mutex
148
+ @thread_ids = Hash.new #hash of tags -> id
149
+ @next_id = 1 #incrementing thread id for the session
150
+ @last_timestamp = 0 #timestamp of most recent event in nanoseconds since epoch
151
+
152
+ end
153
+
154
+ def format( tag, time, record )
155
+ begin
156
+
157
+ if time.nil?
158
+ time = Fluent::Engine.now
159
+ end
160
+
161
+ # handle timestamps that are not EventTime types
162
+ if time.is_a?( Integer )
163
+ time = Fluent::EventTime.new( time )
164
+ elsif time.is_a?( Float )
165
+ components = time.divmod 1 #get integer and decimal components
166
+ sec = components[0].to_i
167
+ nsec = (components[1] * 10**9).to_i
168
+ time = Fluent::EventTime.new( sec, nsec )
169
+ end
170
+
171
+ if @message_field != "message"
172
+ if record.key? @message_field
173
+ if record.key? "message"
174
+ $log.warn "Overwriting log record field 'message'. You are seeing this warning because in your fluentd config file you have configured the '#{@message_field}' field to be converted to the 'message' field, but the log record already contains a field called 'message' and this is now being overwritten."
175
+ end
176
+ record["message"] = record[@message_field]
177
+ record.delete( @message_field )
178
+ end
179
+ end
180
+
181
+ if @message_encoding and record.key? "message" and record["message"]
182
+ if @replace_invalid_utf8 and @message_encoding == Encoding::UTF_8
183
+ record["message"] = record["message"].encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => "<?>").force_encoding('UTF-8')
184
+ else
185
+ record["message"].force_encoding( @message_encoding )
186
+ end
187
+ end
188
+ [tag, time.sec, time.nsec, record].to_msgpack
189
+
190
+ rescue JSON::GeneratorError
191
+ $log.warn "Unable to format message due to JSON::GeneratorError. Record is:\n\t#{record.to_s}"
192
+ raise
193
+ end
194
+ end
195
+
196
+ #called by fluentd when a chunk of log messages is ready
197
+ def write( chunk )
198
+ begin
199
+ $log.debug "Size of chunk is: #{chunk.size}"
200
+ requests = self.build_add_events_body( chunk )
201
+ $log.debug "Chunk split into #{requests.size} request(s)."
202
+
203
+ requests.each_with_index { |request, index|
204
+ $log.debug "Request #{index + 1}/#{requests.size}: #{request[:body].bytesize} bytes"
205
+ begin
206
+ response = self.post_request( @add_events_uri, request[:body] )
207
+ self.handle_response( response )
208
+ rescue OpenSSL::SSL::SSLError => e
209
+ if e.message.include? "certificate verify failed"
210
+ $log.warn "SSL certificate verification failed. Please make sure your certificate bundle is configured correctly and points to a valid file. You can configure this with the ssl_ca_bundle_path configuration option. The current value of ssl_ca_bundle_path is '#{@ssl_ca_bundle_path}'"
211
+ end
212
+ $log.warn e.message
213
+ $log.warn "Discarding buffer chunk without retrying or logging to <secondary>"
214
+ rescue ScalyrThreaded::Client4xxError => e
215
+ $log.warn "4XX status code received for request #{index + 1}/#{requests.size}. Discarding buffer without retrying or logging.\n\t#{response.code} - #{e.message}\n\tChunk Size: #{chunk.size}\n\tLog messages this request: #{request[:record_count]}\n\tJSON payload size: #{request[:body].bytesize}\n\tSample: #{request[:body][0,1024]}..."
216
+
217
+ end
218
+ }
219
+
220
+ rescue JSON::GeneratorError
221
+ $log.warn "Unable to format message due to JSON::GeneratorError."
222
+ raise
223
+ end
224
+ end
225
+
226
+
227
+ #explicit function to convert to nanoseconds
228
+ #will make things easier to maintain if/when fluentd supports higher than second resolutions
229
+ def to_nanos( seconds, nsec )
230
+ (seconds * 10**9) + nsec
231
+ end
232
+
233
+ #explicit function to convert to milliseconds
234
+ #will make things easier to maintain if/when fluentd supports higher than second resolutions
235
+ def to_millis( timestamp )
236
+ (timestamp.sec * 10**3) + (timestamp.nsec / 10**6)
237
+ end
238
+
239
+ def post_request( uri, body )
240
+
241
+ https = Net::HTTP.new( uri.host, uri.port )
242
+ https.use_ssl = true
243
+
244
+ #verify peers to prevent potential MITM attacks
245
+ if @ssl_verify_peer
246
+ https.ca_file = @ssl_ca_bundle_path
247
+ https.verify_mode = OpenSSL::SSL::VERIFY_PEER
248
+ https.verify_depth = @ssl_verify_depth
249
+ end
250
+
251
+ #use compression if enabled
252
+ encoding = nil
253
+
254
+ if @compression_type
255
+ if @compression_type == 'deflate'
256
+ encoding = 'deflate'
257
+ body = Zlib::Deflate.deflate(body, @compression_level)
258
+ elsif @compression_type == 'bz2'
259
+ encoding = 'bz2'
260
+ io = StringIO.new
261
+ bz2 = RBzip2.default_adapter::Compressor.new io
262
+ bz2.write body
263
+ bz2.close
264
+ body = io.string
265
+ end
266
+ end
267
+
268
+ post = Net::HTTP::Post.new uri.path
269
+ post.add_field( 'Content-Type', 'application/json' )
270
+
271
+ if @compression_type
272
+ post.add_field( 'Content-Encoding', encoding )
273
+ end
274
+
275
+ post.body = body
276
+
277
+ https.request( post )
278
+
279
+ end
280
+
281
+ def handle_response( response )
282
+ $log.debug "Response Code: #{response.code}"
283
+ $log.debug "Response Body: #{response.body}"
284
+
285
+ response_hash = Hash.new
286
+
287
+ begin
288
+ response_hash = JSON.parse( response.body )
289
+ rescue
290
+ response_hash["status"] = "Invalid JSON response from server"
291
+ end
292
+
293
+ #make sure the JSON reponse has a "status" field
294
+ if !response_hash.key? "status"
295
+ $log.debug "JSON response does not contain status message"
296
+ raise ScalyrThreaded::ServerError.new "JSON response does not contain status message"
297
+ end
298
+
299
+ status = response_hash["status"]
300
+
301
+ #4xx codes are handled separately
302
+ if response.code =~ /^4\d\d/
303
+ raise ScalyrThreaded::Client4xxError.new status
304
+ else
305
+ if status != "success"
306
+ if status =~ /discardBuffer/
307
+ $log.warn "Received 'discardBuffer' message from server. Buffer dropped."
308
+ elsif status =~ %r"/client/"i
309
+ raise ScalyrThreaded::ClientError.new status
310
+ else #don't check specifically for server, we assume all non-client errors are server errors
311
+ raise ScalyrThreaded::ServerError.new status
312
+ end
313
+ elsif !response.code.include? "200" #response code is a string not an int
314
+ raise ScalyrThreaded::ServerError
315
+ end
316
+ end
317
+
318
+ end
319
+
320
+ def build_add_events_body( chunk )
321
+
322
+ #requests
323
+ requests = Array.new
324
+
325
+ #set of unique scalyr threads for this chunk
326
+ current_threads = Hash.new
327
+
328
+ #byte count
329
+ total_bytes = 0
330
+
331
+ #create a Scalyr event object for each record in the chunk
332
+ events = Array.new
333
+ chunk.msgpack_each {|(tag, sec, nsec, record)|
334
+
335
+ timestamp = self.to_nanos( sec, nsec )
336
+
337
+ thread_id = 0
338
+
339
+ @sync.synchronize {
340
+ #ensure timestamp is at least 1 nanosecond greater than the last one
341
+ timestamp = [timestamp, @last_timestamp + 1].max
342
+ @last_timestamp = timestamp
343
+
344
+ #get thread id or add a new one if we haven't seen this tag before
345
+ if @thread_ids.key? tag
346
+ thread_id = @thread_ids[tag]
347
+ else
348
+ thread_id = @next_id
349
+ @thread_ids[tag] = thread_id
350
+ @next_id += 1
351
+ end
352
+ }
353
+
354
+ #then update the map of threads for this chunk
355
+ current_threads[tag] = thread_id
356
+
357
+ #add a logfile field if one doesn't exist
358
+ if !record.key? "logfile"
359
+ record["logfile"] = "/fluentd/#{tag}"
360
+ end
361
+
362
+ #append to list of events
363
+ event = { :thread => thread_id.to_s,
364
+ :ts => timestamp,
365
+ :attrs => record
366
+ }
367
+
368
+ #get json string of event to keep track of how many bytes we are sending
369
+
370
+ begin
371
+ event_json = event.to_json
372
+ rescue JSON::GeneratorError, Encoding::UndefinedConversionError => e
373
+ $log.warn "#{e.class}: #{e.message}"
374
+
375
+ # Send the faulty event to a label @ERROR block and allow to handle it there (output to exceptions file for ex)
376
+ time = Fluent::EventTime.new( sec, nsec )
377
+ router.emit_error_event(tag, time, record, e)
378
+
379
+ event[:attrs].each do |key, value|
380
+ $log.debug "\t#{key} (#{value.encoding.name}): '#{value}'"
381
+ event[:attrs][key] = value.encode("UTF-8", :invalid => :replace, :undef => :replace, :replace => "<?>").force_encoding('UTF-8')
382
+ end
383
+ event_json = event.to_json
384
+ end
385
+
386
+ #generate new request if json size of events in the array exceed maximum request buffer size
387
+ append_event = true
388
+ if total_bytes + event_json.bytesize > @max_request_buffer
389
+ #make sure we always have at least one event
390
+ if events.size == 0
391
+ events << event
392
+ append_event = false
393
+ end
394
+ request = self.create_request( events, current_threads )
395
+ requests << request
396
+
397
+ total_bytes = 0
398
+ current_threads = Hash.new
399
+ events = Array.new
400
+ end
401
+
402
+ #if we haven't consumed the current event already
403
+ #add it to the end of our array and keep track of the json bytesize
404
+ if append_event
405
+ events << event
406
+ total_bytes += event_json.bytesize
407
+ end
408
+
409
+ }
410
+
411
+ #create a final request with any left over events
412
+ request = self.create_request( events, current_threads )
413
+ requests << request
414
+
415
+ end
416
+
417
+ def create_request( events, current_threads )
418
+ #build the scalyr thread objects
419
+ threads = Array.new
420
+ current_threads.each do |tag, id|
421
+ threads << { :id => id.to_s,
422
+ :name => "Fluentd: #{tag}"
423
+ }
424
+ end
425
+
426
+ current_time = self.to_millis( Fluent::Engine.now )
427
+
428
+ body = { :token => @api_write_token,
429
+ :client_timestamp => current_time.to_s,
430
+ :session => @session,
431
+ :events => events,
432
+ :threads => threads
433
+ }
434
+
435
+ #add server_attributes hash if it exists
436
+ if @server_attributes
437
+ body[:sessionInfo] = @server_attributes
438
+ end
439
+
440
+ { :body => body.to_json, :record_count => events.size }
441
+ end
442
+
443
+ end
444
+ end
@@ -0,0 +1,24 @@
1
+ #
2
+ # Scalyr Output Plugin for Fluentd
3
+ #
4
+ # Copyright (C) 2015 Scalyr, Inc.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+
19
+
20
+ module ScalyrThreaded
21
+ class ClientError < StandardError; end
22
+ class Client4xxError < StandardError; end
23
+ class ServerError < StandardError; end
24
+ end