instrumental_agent 2.0.0 → 3.0.0.beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7fec654f621fd1e8f11f79fca789690d2e3d9b52
4
- data.tar.gz: 078bf7a001fab6ed9f982cf818e675def3aaf65f
2
+ SHA256:
3
+ metadata.gz: 20bc1dfd989a1d555912b21cd1a43b0a2ebb235e411fed6b27746cd3c01ce2ad
4
+ data.tar.gz: 7e725c0d514db6cccba3827a76a7b73995b06084a54bf0637cf7c2423d2beb3b
5
5
  SHA512:
6
- metadata.gz: 4d2b8bdfa044822f00fb9b682817d2bc1c9108e26b544b93ee218212d7ce3b41ff88c0c422de21300e35638b7490f5a5032096d469ab322d1ab3f0c369012c63
7
- data.tar.gz: 8a3494eb5e9c5932d54a7a2192d28ac181963c5d4f98567ca5cf54aa3381582eb2bed88439df664dd88a6fd4c0689c37e5ec8489e82d0fc15fffd58702cc04a8
6
+ metadata.gz: 356538412bb4aeeb4af6dbb912de961ca587b08410a48cc60b8246e7d138909dab5f75261dd441bf4fd9287868565c2a4554a313c0b9d43316ca7ac615f38b20
7
+ data.tar.gz: f819635e2b8ea0efaa7a25ed62cdf52d54b602e8b8703a7eea3b7975f83ed557ffe0cdd1a39b5dd97942492e3a872d4e343c6c179de18feb3dbb257c08faf1a8
@@ -1 +1 @@
1
- 2.0.0-p648
1
+ 2.6.3
@@ -1,8 +1,7 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.0.0-p648
5
- - 2.1.5
6
- - 2.2.3
7
- - 2.3.0
8
- - 2.4.0
4
+ - 2.5.7
5
+ - 2.6.3
6
+ - 2.6.6
7
+ - 2.7.1
@@ -1,3 +1,18 @@
1
+ ### 3.0.0.beta [October 8, 2020]
2
+ * Drop support for outdated versions of Ruby
3
+ * Explicitly add support for new versions of Ruby
4
+ * Add support for client-side aggregation
5
+ * Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
6
+
7
+ ### 3.0.0.alpha [August 22, 2019]
8
+ * Drop support for outdated versions of Ruby
9
+ * Explicitly add support for new versions of Ruby
10
+ * Better handling of SSL errors when connecting to Instrumental
11
+ * Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
12
+
13
+ ### 2.1.0 [January 19, 2018]
14
+ * Add support for capistrano 3
15
+
1
16
  ### 2.0.0 [August 21, 2017]
2
17
  * Add automatic tracking of common application metrics, official release
3
18
 
data/Gemfile CHANGED
@@ -1,11 +1,6 @@
1
1
  source "https://rubygems.org"
2
2
 
3
3
  gemspec
4
- ruby_engine = defined?(RUBY_ENGINE) && RUBY_ENGINE
5
- if RUBY_VERSION < "1.9" && !%w{jruby rbx}.include?(ruby_engine)
6
- # Built and installed via ext/mkrf_conf.rb
7
- gem 'system_timer', '~> 1.2'
8
- end
9
4
 
10
5
  # fixes 2.3.0 ffi bundle error
11
- gem 'ffi', '~> 1.0.11'
6
+ gem 'ffi', '~> 1.0.11'
data/README.md CHANGED
@@ -59,6 +59,17 @@ User.find_each do |user|
59
59
  end
60
60
  ```
61
61
 
62
+ ## Aggregation
63
+ Aggregation collects more data on your system before sending it to Instrumental. This reduces the total amount of data being sent, at the cost of a small amount of additional latency. You can control this feature with the frequency parameter:
64
+
65
+ ```ruby
66
+ I = Instrumental::Agent.new('PROJECT_API_TOKEN', :frequency => 15) # send data every 15 seconds
67
+ I.frequency = 6 # send batches of data every 6 seconds
68
+ ```
69
+
70
+ The agent may send data more frequently if you are sending a large number of different metrics. Values between 3 and 15 are generally reasonable. If you want to disable this behavior and send every metric as fast as possible, set frequency to zero or nil. Note that a frequency of zero will still use a seperate thread for performance - it is NOT the same as synchronous mode.
71
+
72
+
62
73
  ## Server Metrics
63
74
 
64
75
  Want server stats like load, memory, etc.? Check out [InstrumentalD](https://github.com/instrumental/instrumentald).
@@ -109,6 +120,17 @@ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
109
120
  )
110
121
  ```
111
122
 
123
+ ### Upgrading from 2.x
124
+
125
+ Agent version 3.x drops support for some older rubies, but should otherwise be a drop-in replacement. If you wish to enable Aggregation, enable the agent with the frequency option set to the number of seconds you would like to wait between flushes. For example:
126
+
127
+ ```
128
+ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
129
+ :enabled => Rails.env.production?,
130
+ :frequency => 15
131
+ )
132
+ ```
133
+
112
134
  ## Troubleshooting & Help
113
135
 
114
136
  We are here to help. Email us at [support@instrumentalapp.com](mailto:support@instrumentalapp.com).
@@ -4,13 +4,13 @@ require "instrumental/version"
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "instrumental_agent"
6
6
  s.version = Instrumental::VERSION
7
- s.authors = ["Elijah Miller", "Christopher Zelenak", "Kristopher Chambers", "Matthew Hassfurder"]
7
+ s.authors = ["Expected Behavior"]
8
8
  s.email = ["support@instrumentalapp.com"]
9
9
  s.homepage = "http://github.com/instrumental/instrumental_agent-ruby"
10
10
  s.summary = %q{Custom metric monitoring for Ruby applications via Instrumental}
11
11
  s.description = %q{This agent supports Instrumental custom metric monitoring for Ruby applications. It provides high-data reliability at high scale, without ever blocking your process or causing an exception.}
12
12
  s.license = "MIT"
13
- s.required_ruby_version = '>= 2.0.0'
13
+ s.required_ruby_version = '>= 2.5.7'
14
14
 
15
15
  s.files = `git ls-files`.split("\n")
16
16
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -1,5 +1,7 @@
1
1
  require 'instrumental/version'
2
2
  require 'instrumental/system_timer'
3
+ require 'instrumental/command_structs'
4
+ require 'instrumental/event_aggregator'
3
5
  require 'logger'
4
6
  require 'openssl' rescue nil
5
7
  require 'resolv'
@@ -15,14 +17,17 @@ module Instrumental
15
17
  EXIT_FLUSH_TIMEOUT = 5
16
18
  HOSTNAME = Socket.gethostbyname(Socket.gethostname).first rescue Socket.gethostname
17
19
  MAX_BUFFER = 5000
20
+ MAX_AGGREGATOR_SIZE = 5000
18
21
  MAX_RECONNECT_DELAY = 15
19
22
  REPLY_TIMEOUT = 10
20
23
  RESOLUTION_FAILURES_BEFORE_WAITING = 3
21
24
  RESOLUTION_WAIT = 30
22
25
  RESOLVE_TIMEOUT = 1
26
+ DEFAULT_FREQUENCY = 0
27
+ VALID_FREQUENCIES = [0, 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60]
23
28
 
24
29
 
25
- attr_accessor :host, :port, :synchronous, :queue, :dns_resolutions, :last_connect_at
30
+ attr_accessor :host, :port, :synchronous, :frequency, :sender_queue, :aggregator_queue, :dns_resolutions, :last_connect_at
26
31
  attr_reader :connection, :enabled, :secure
27
32
 
28
33
  def self.logger=(l)
@@ -52,6 +57,7 @@ module Instrumental
52
57
  # port: 8001
53
58
  # enabled: true
54
59
  # synchronous: false
60
+ # frequency: 10
55
61
  # secure: true
56
62
  # verify: true
57
63
  @api_key = api_key
@@ -73,14 +79,23 @@ module Instrumental
73
79
  @port = (@port || default_port).to_i
74
80
  @enabled = options.has_key?(:enabled) ? !!options[:enabled] : true
75
81
  @synchronous = !!options[:synchronous]
82
+
83
+ if options.has_key?(:frequency)
84
+ self.frequency = options[:frequency]
85
+ else
86
+ self.frequency = DEFAULT_FREQUENCY
87
+ end
88
+
89
+ @metrician = options[:metrician].nil? ? true : !!options[:metrician]
76
90
  @pid = Process.pid
77
91
  @allow_reconnect = true
78
- @certs = certificates
79
92
  @dns_resolutions = 0
80
93
  @last_connect_at = 0
81
- @metrician = options[:metrician].nil? ? true : !!options[:metrician]
94
+
82
95
  @start_worker_mutex = Mutex.new
83
- @queue = Queue.new
96
+ @aggregator_queue = Queue.new
97
+ @sender_queue = Queue.new
98
+
84
99
 
85
100
  setup_cleanup_at_exit if @enabled
86
101
 
@@ -94,7 +109,9 @@ module Instrumental
94
109
  # agent.gauge('load', 1.23)
95
110
  def gauge(metric, value, time = Time.now, count = 1)
96
111
  if valid?(metric, value, time, count) &&
97
- send_command("gauge", metric, value, time.to_i, count.to_i)
112
+ send_command(Instrumental::Command.new("gauge".freeze, metric, value, time, count))
113
+ # tempted to "gauge" this to a symbol? Don't. Frozen strings are very fast,
114
+ # and later we're going to to_s every one of these anyway.
98
115
  value
99
116
  else
100
117
  nil
@@ -142,7 +159,7 @@ module Instrumental
142
159
  # agent.increment('users')
143
160
  def increment(metric, value = 1, time = Time.now, count = 1)
144
161
  if valid?(metric, value, time, count) &&
145
- send_command("increment", metric, value, time.to_i, count.to_i)
162
+ send_command(Instrumental::Command.new("increment".freeze, metric, value, time, count))
146
163
  value
147
164
  else
148
165
  nil
@@ -157,7 +174,7 @@ module Instrumental
157
174
  # agent.notice('A notice')
158
175
  def notice(note, time = Time.now, duration = 0)
159
176
  if valid_note?(note)
160
- send_command("notice", time.to_i, duration.to_i, note)
177
+ send_command(Instrumental::Notice.new(note, time, duration))
161
178
  note
162
179
  else
163
180
  nil
@@ -196,6 +213,22 @@ module Instrumental
196
213
  @logger || self.class.logger
197
214
  end
198
215
 
216
+ def frequency=(frequency)
217
+ freq = frequency.to_i
218
+ if !VALID_FREQUENCIES.include?(freq)
219
+ logger.warn "Frequency must be a value that divides evenly into 60: 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, or 60."
220
+ # this will make all negative numbers and nils into 0s
221
+ freq = VALID_FREQUENCIES.select{ |f| f < freq }.max.to_i
222
+ end
223
+
224
+ @frequency = if(@synchronous)
225
+ logger.warn "Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode."
226
+ 0
227
+ else
228
+ freq
229
+ end
230
+ end
231
+
199
232
  # Stopping the agent will immediately stop all communication
200
233
  # to Instrumental. If you call this and submit another metric,
201
234
  # the agent will start again.
@@ -207,12 +240,19 @@ module Instrumental
207
240
  #
208
241
  def stop
209
242
  disconnect
210
- if @thread
211
- @thread.kill
212
- @thread = nil
243
+ if @sender_thread
244
+ @sender_thread.kill
245
+ @sender_thread = nil
246
+ end
247
+ if @aggregator_thread
248
+ @aggregator_thread.kill
249
+ @aggregator_thread = nil
213
250
  end
214
- if @queue
215
- @queue.clear
251
+ if @sender_queue
252
+ @sender_queue.clear
253
+ end
254
+ if @aggregator_queue
255
+ @aggregator_queue.clear
216
256
  end
217
257
  end
218
258
 
@@ -222,18 +262,25 @@ module Instrumental
222
262
  # where at_exit is bypassed like Resque workers.
223
263
  def cleanup
224
264
  if running?
225
- logger.info "Cleaning up agent, queue size: #{@queue.size}, thread running: #{@thread.alive?}"
265
+ logger.info "Cleaning up agent, aggregator_size: #{@aggregator_queue.size}, thread_running: #{@aggregator_thread.alive?}"
266
+ logger.info "Cleaning up agent, queue size: #{@sender_queue.size}, thread running: #{@sender_thread.alive?}"
226
267
  @allow_reconnect = false
227
- if @queue.size > 0
228
- queue_message('exit')
229
- begin
230
- with_timeout(EXIT_FLUSH_TIMEOUT) { @thread.join }
231
- rescue Timeout::Error
232
- if @queue.size > 0
233
- logger.error "Timed out working agent thread on exit, dropping #{@queue.size} metrics"
234
- else
235
- logger.error "Timed out Instrumental Agent, exiting"
236
- end
268
+ begin
269
+ with_timeout(EXIT_FLUSH_TIMEOUT) do
270
+ @aggregator_queue << ['exit']
271
+ @aggregator_thread.join
272
+ @sender_queue << ['exit']
273
+ @sender_thread.join
274
+ end
275
+ rescue Timeout::Error
276
+ total_size = @sender_queue&.size.to_i +
277
+ @aggregator_queue&.size.to_i +
278
+ @event_aggregator&.size.to_i
279
+
280
+ if total_size > 0
281
+ logger.error "Timed out working agent thread on exit, dropping #{total_size} metrics"
282
+ else
283
+ logger.error "Timed out Instrumental Agent, exiting"
237
284
  end
238
285
  end
239
286
  end
@@ -271,7 +318,8 @@ module Instrumental
271
318
  end
272
319
 
273
320
  def report_exception(e)
274
- logger.error "Exception occurred: #{e.message}\n#{e.backtrace.join("\n")}"
321
+ # puts "--- Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
322
+ logger.error "Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
275
323
  end
276
324
 
277
325
  def ipv4_address_for_host(host, port, moment_to_connect = Time.now.to_i)
@@ -291,44 +339,41 @@ module Instrumental
291
339
  nil
292
340
  end
293
341
 
294
- def send_command(cmd, *args)
295
- cmd = "%s %s\n" % [cmd, args.collect { |a| a.to_s }.join(" ")]
296
- if enabled?
297
-
298
- start_connection_worker
299
- if @queue && @queue.size < MAX_BUFFER
300
- @queue_full_warning = false
301
- logger.debug "Queueing: #{cmd.chomp}"
302
- queue_message(cmd, { :synchronous => @synchronous })
303
- else
304
- if !@queue_full_warning
305
- @queue_full_warning = true
306
- logger.warn "Queue full(#{@queue.size}), dropping commands..."
307
- end
308
- logger.debug "Dropping command, queue full(#{@queue.size}): #{cmd.chomp}"
309
- nil
310
- end
342
+ def send_command(command)
343
+ return logger.debug(command.to_s) unless enabled?
344
+ start_workers
345
+ critical_queue = frequency.to_i == 0 ? @sender_queue : @aggregator_queue
346
+ if critical_queue && critical_queue.size < MAX_BUFFER
347
+ @queue_full_warning = false
348
+ logger.debug "Queueing: #{command.to_s}"
349
+ queue_message(command, { :synchronous => @synchronous })
311
350
  else
312
- logger.debug cmd.strip
351
+ if !@queue_full_warning
352
+ @queue_full_warning = true
353
+ logger.warn "Queue full(#{critical_queue.size}), dropping commands..."
354
+ end
355
+ logger.debug "Dropping command, queue full(#{critical_queue.size}): #{command.to_s}"
356
+ nil
313
357
  end
314
358
  end
315
359
 
316
360
  def queue_message(message, options = {})
317
- if @enabled
318
- options ||= {}
319
- if options[:allow_reconnect].nil?
320
- options[:allow_reconnect] = @allow_reconnect
321
- end
322
- synchronous = options.delete(:synchronous)
323
- if synchronous
324
- options[:sync_resource] ||= ConditionVariable.new
325
- @sync_mutex.synchronize {
326
- @queue << [message, options]
327
- options[:sync_resource].wait(@sync_mutex)
328
- }
329
- else
330
- @queue << [message, options]
331
- end
361
+ return message unless enabled?
362
+
363
+ # imagine it's a reverse merge, but with fewer allocations
364
+ options[:allow_reconnect] = @allow_reconnect unless options.has_key?(:allow_reconnect)
365
+
366
+ if options.delete(:synchronous)
367
+ options[:sync_resource] ||= ConditionVariable.new
368
+ @sync_mutex.synchronize {
369
+ queue = message == "flush" ? @aggregator_queue : @sender_queue
370
+ queue << [message, options]
371
+ options[:sync_resource].wait(@sync_mutex)
372
+ }
373
+ elsif frequency.to_i == 0
374
+ @sender_queue << [message, options]
375
+ else
376
+ @aggregator_queue << [message, options]
332
377
  end
333
378
  message
334
379
  end
@@ -350,31 +395,15 @@ module Instrumental
350
395
 
351
396
  def test_connection
352
397
  begin
353
- # In the case where the socket is an OpenSSL::SSL::SSLSocket,
354
- # on Ruby 1.8.6, 1.8.7 or 1.9.1, read_nonblock does not exist,
355
- # and so the case of testing socket liveliness via a nonblocking
356
- # read that catches a wait condition won't work.
357
- #
358
- # We grab the SSL socket's underlying IO object and perform the
359
- # non blocking read there in order to ensure the socket is still
360
- # valid
361
- if @socket.respond_to?(:read_nonblock)
362
- @socket.read_nonblock(1)
363
- elsif @socket.respond_to?(:io)
364
- # The SSL Socket may send down additional data at close time,
365
- # so we perform two nonblocking reads, one to pull any pending
366
- # data on the socket, and the second to actually perform the connection
367
- # liveliness test
368
- @socket.io.read_nonblock(1024) && @socket.io.read_nonblock(1024)
369
- end
398
+ @socket.read_nonblock(1)
370
399
  rescue *wait_exceptions
371
400
  # noop
372
401
  end
373
402
  end
374
403
 
375
- def start_connection_worker
404
+ def start_workers
376
405
  # NOTE: We need a mutex around both `running?` and thread creation,
377
- # otherwise we could create two threads.
406
+ # otherwise we could create too many threads.
378
407
  # Return early and queue the message if another thread is
379
408
  # starting the worker.
380
409
  return if !@start_worker_mutex.try_lock
@@ -384,13 +413,34 @@ module Instrumental
384
413
  disconnect
385
414
  address = ipv4_address_for_host(@host, @port)
386
415
  if address
387
- @pid = Process.pid
416
+ new_pid = if @pid != Process.pid
417
+ @pid = Process.pid
418
+ true
419
+ else
420
+ false
421
+ end
422
+
388
423
  @sync_mutex = Mutex.new
389
424
  @failures = 0
390
425
  @sockaddr_in = Socket.pack_sockaddr_in(@port, address)
391
- logger.info "Starting thread"
392
- @thread = Thread.new do
393
- run_worker_loop
426
+
427
+ logger.info "Starting aggregator thread"
428
+ if !@aggregator_thread&.alive?
429
+ if new_pid
430
+ @event_aggregator = nil
431
+ @aggregator_queue = Queue.new
432
+ end
433
+ @aggregator_thread = Thread.new do
434
+ run_aggregator_loop
435
+ end
436
+ end
437
+
438
+ if !@sender_thread&.alive?
439
+ logger.info "Starting sender thread"
440
+ @sender_queue = Queue.new if new_pid
441
+ @sender_thread = Thread.new do
442
+ run_sender_loop
443
+ end
394
444
  end
395
445
  end
396
446
  ensure
@@ -426,82 +476,155 @@ module Instrumental
426
476
  sock
427
477
  end
428
478
 
429
- def run_worker_loop
430
- command_and_args = nil
431
- command_options = nil
432
- logger.info "connecting to collector"
433
- with_timeout(CONNECT_TIMEOUT) do
434
- @socket = open_socket(@sockaddr_in, @secure, @verify_cert)
479
+ def run_aggregator_loop
480
+ # if the sender queue is some level of full, should we keep aggregating until it empties out?
481
+ # what does this mean for aggregation slices - aggregating to nearest frequency will
482
+ # make the object needlessly larger, when minute resolution is what we have on the server
483
+ begin
484
+ loop do
485
+ now = Time.now.to_i
486
+ time_to_wait = if frequency == 0
487
+ 0
488
+ else
489
+ next_frequency = (now - (now % frequency)) + frequency
490
+ time_to_wait = [(next_frequency - Time.now.to_f), 0].max
491
+ end
492
+
493
+ command_and_args, command_options = if @event_aggregator&.size.to_i > MAX_AGGREGATOR_SIZE
494
+ logger.info "Aggregator full, flushing early with #{MAX_AGGREGATOR_SIZE} metrics."
495
+ command_and_args, command_options = ['forward', {}]
496
+ else
497
+ begin
498
+ with_timeout(time_to_wait) do
499
+ @aggregator_queue.pop
500
+ end
501
+ rescue Timeout::Error
502
+ ['forward', {}]
503
+ end
504
+ end
505
+ if command_and_args
506
+ case command_and_args
507
+ when 'exit'
508
+ if !@event_aggregator.nil?
509
+ @sender_queue << @event_aggregator
510
+ @event_aggregator = nil
511
+ end
512
+ logger.info "Exiting, #{@aggregator_queue.size} commands remain"
513
+ return true
514
+ when 'flush'
515
+ if !@event_aggregator.nil?
516
+ @sender_queue << @event_aggregator
517
+ @event_aggregator = nil
518
+ end
519
+ @sender_queue << ['flush', command_options]
520
+ when 'forward'
521
+ if !@event_aggregator.nil?
522
+ next if @sender_queue.size > 0 && @sender_queue.num_waiting < 1
523
+ @sender_queue << @event_aggregator
524
+ @event_aggregator = nil
525
+ end
526
+ when Notice
527
+ @sender_queue << [command_and_args, command_options]
528
+ else
529
+ @event_aggregator = EventAggregator.new(frequency: @frequency) if @event_aggregator.nil?
530
+
531
+ logger.debug "Sending: #{command_and_args} to aggregator"
532
+ @event_aggregator.put(command_and_args)
533
+ end
534
+ command_and_args = nil
535
+ command_options = nil
536
+ end
537
+ end
538
+ rescue Exception => err
539
+ report_exception(err)
435
540
  end
436
- logger.info "connected to collector at #{host}:#{port}"
437
- hello_options = {
438
- "version" => "ruby/instrumental_agent/#{VERSION}",
439
- "hostname" => HOSTNAME,
440
- "pid" => Process.pid,
441
- "runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
442
- "platform" => RUBY_PLATFORM
443
- }.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
444
-
445
- send_with_reply_timeout "hello #{hello_options}"
446
- send_with_reply_timeout "authenticate #{@api_key}"
541
+ end
542
+
543
+ def run_sender_loop
447
544
  @failures = 0
448
- loop do
449
- command_and_args, command_options = @queue.pop
450
- if command_and_args
451
- sync_resource = command_options && command_options[:sync_resource]
452
- test_connection
453
- case command_and_args
454
- when 'exit'
455
- logger.info "Exiting, #{@queue.size} commands remain"
456
- return true
457
- when 'flush'
458
- release_resource = true
459
- else
460
- logger.debug "Sending: #{command_and_args.chomp}"
461
- @socket.puts command_and_args
462
- end
463
- command_and_args = nil
464
- command_options = nil
465
- if sync_resource
466
- @sync_mutex.synchronize do
467
- sync_resource.signal
545
+ begin
546
+ logger.info "connecting to collector"
547
+ command_and_args = nil
548
+ command_options = nil
549
+ with_timeout(CONNECT_TIMEOUT) do
550
+ @socket = open_socket(@sockaddr_in, @secure, @verify_cert)
551
+ end
552
+ logger.info "connected to collector at #{host}:#{port}"
553
+ hello_options = {
554
+ "version" => "ruby/instrumental_agent/#{VERSION}",
555
+ "hostname" => HOSTNAME,
556
+ "pid" => Process.pid,
557
+ "runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
558
+ "platform" => RUBY_PLATFORM
559
+ }.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
560
+
561
+ send_with_reply_timeout "hello #{hello_options}"
562
+ send_with_reply_timeout "authenticate #{@api_key}"
563
+
564
+ loop do
565
+ command_and_args, command_options = @sender_queue.pop
566
+ if command_and_args
567
+ sync_resource = command_options && command_options[:sync_resource]
568
+ test_connection
569
+ case command_and_args
570
+ when 'exit'
571
+ logger.info "Exiting, #{@sender_queue.size} commands remain"
572
+ return true
573
+ when 'flush'
574
+ release_resource = true
575
+ when EventAggregator
576
+ command_and_args.values.values.each do |command|
577
+ logger.debug "Sending: #{command}"
578
+ @socket.puts command
579
+ end
580
+ else
581
+ logger.debug "Sending: #{command_and_args}"
582
+ @socket.puts command_and_args
583
+ end
584
+ command_and_args = nil
585
+ command_options = nil
586
+ if sync_resource
587
+ @sync_mutex.synchronize do
588
+ sync_resource.signal
589
+ end
468
590
  end
469
591
  end
470
592
  end
471
- end
472
- rescue Exception => err
473
- allow_reconnect = @allow_reconnect
474
- case err
475
- when EOFError
593
+ rescue Exception => err
594
+ allow_reconnect = @allow_reconnect
595
+ case err
596
+ when EOFError
476
597
  # nop
477
- when Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::EADDRINUSE, Timeout::Error
478
- # If the connection has been refused by Instrumental
479
- # or we cannot reach the server
480
- # or the connection state of this socket is in a race
481
- logger.error "unable to connect to Instrumental, hanging up with #{@queue.size} messages remaining"
482
- logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
483
- allow_reconnect = false
484
- else
485
- report_exception(err)
486
- end
487
- if allow_reconnect == false ||
488
- (command_options && command_options[:allow_reconnect] == false)
489
- logger.info "Not trying to reconnect"
490
- @failures = 0
491
- return
492
- end
493
- if command_and_args
494
- logger.debug "requeueing: #{command_and_args}"
495
- @queue << command_and_args
598
+ when Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::EADDRINUSE, Timeout::Error, OpenSSL::SSL::SSLError
599
+ # If the connection has been refused by Instrumental
600
+ # or we cannot reach the server
601
+ # or the connection state of this socket is in a race
602
+ # or SSL is not functioning properly for some reason
603
+ logger.error "unable to connect to Instrumental, hanging up with #{@sender_queue.size} messages remaining"
604
+ logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
605
+ allow_reconnect = false
606
+ else
607
+ report_exception(err)
608
+ end
609
+ if allow_reconnect == false ||
610
+ (command_options && command_options[:allow_reconnect] == false)
611
+ logger.info "Not trying to reconnect"
612
+ @failures = 0
613
+ return
614
+ end
615
+ if command_and_args
616
+ logger.debug "requeueing: #{command_and_args}"
617
+ @sender_queue << command_and_args
618
+ end
619
+ disconnect
620
+ @failures += 1
621
+ delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
622
+ logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
623
+ sleep delay
624
+ retry
625
+ ensure
626
+ disconnect
496
627
  end
497
- disconnect
498
- @failures += 1
499
- delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
500
- logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
501
- sleep delay
502
- retry
503
- ensure
504
- disconnect
505
628
  end
506
629
 
507
630
  def setup_cleanup_at_exit
@@ -511,7 +634,11 @@ module Instrumental
511
634
  end
512
635
 
513
636
  def running?
514
- !@thread.nil? && @pid == Process.pid && @thread.alive?
637
+ !@sender_thread.nil? &&
638
+ !@aggregator_thread.nil? &&
639
+ @pid == Process.pid &&
640
+ @sender_thread.alive? &&
641
+ @aggregator_thread.alive?
515
642
  end
516
643
 
517
644
  def flush_socket(socket)
@@ -541,18 +668,5 @@ module Instrumental
541
668
  def allows_secure?
542
669
  defined?(OpenSSL)
543
670
  end
544
-
545
- def certificates
546
- if allows_secure?
547
- base_dir = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
548
- %w{equifax geotrust rapidssl}.map do |name|
549
- OpenSSL::X509::Certificate.new(File.open(File.join(base_dir, "certs", "#{name}.ca.pem")))
550
- end
551
- else
552
- []
553
- end
554
- end
555
-
556
671
  end
557
-
558
672
  end