instrumental_agent 2.0.0 → 3.0.0.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/.travis.yml +4 -5
- data/CHANGELOG.md +15 -0
- data/Gemfile +1 -6
- data/README.md +22 -0
- data/instrumental_agent.gemspec +2 -2
- data/lib/instrumental/agent.rb +277 -163
- data/lib/instrumental/capistrano.rb +4 -46
- data/lib/instrumental/capistrano/capistrano2.rb +47 -0
- data/lib/instrumental/capistrano/capistrano3.rake +56 -0
- data/lib/instrumental/command_structs.rb +32 -0
- data/lib/instrumental/event_aggregator.rb +28 -0
- data/lib/instrumental/version.rb +1 -1
- data/spec/agent_spec.rb +419 -43
- data/spec/command_struct_specs.rb +20 -0
- data/spec/event_aggregator_spec.rb +53 -0
- data/spec/spec_helper.rb +9 -0
- metadata +35 -34
- data/certs/equifax.ca.pem +0 -69
- data/certs/geotrust.ca.pem +0 -80
- data/certs/rapidssl.ca.pem +0 -94
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 20bc1dfd989a1d555912b21cd1a43b0a2ebb235e411fed6b27746cd3c01ce2ad
|
4
|
+
data.tar.gz: 7e725c0d514db6cccba3827a76a7b73995b06084a54bf0637cf7c2423d2beb3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 356538412bb4aeeb4af6dbb912de961ca587b08410a48cc60b8246e7d138909dab5f75261dd441bf4fd9287868565c2a4554a313c0b9d43316ca7ac615f38b20
|
7
|
+
data.tar.gz: f819635e2b8ea0efaa7a25ed62cdf52d54b602e8b8703a7eea3b7975f83ed557ffe0cdd1a39b5dd97942492e3a872d4e343c6c179de18feb3dbb257c08faf1a8
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.3
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
### 3.0.0.beta [October 8, 2020]
|
2
|
+
* Drop support for outdated versions of Ruby
|
3
|
+
* Explicitly add support for new versions of Ruby
|
4
|
+
* Add support for client-side aggregation
|
5
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
6
|
+
|
7
|
+
### 3.0.0.alpha [August 22, 2019]
|
8
|
+
* Drop support for outdated versions of Ruby
|
9
|
+
* Explicitly add support for new versions of Ruby
|
10
|
+
* Better handling of SSL errors when connecting to Instrumental
|
11
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
12
|
+
|
13
|
+
### 2.1.0 [January 19, 2018]
|
14
|
+
* Add support for capistrano 3
|
15
|
+
|
1
16
|
### 2.0.0 [August 21, 2017]
|
2
17
|
* Add automatic tracking of common application metrics, official release
|
3
18
|
|
data/Gemfile
CHANGED
@@ -1,11 +1,6 @@
|
|
1
1
|
source "https://rubygems.org"
|
2
2
|
|
3
3
|
gemspec
|
4
|
-
ruby_engine = defined?(RUBY_ENGINE) && RUBY_ENGINE
|
5
|
-
if RUBY_VERSION < "1.9" && !%w{jruby rbx}.include?(ruby_engine)
|
6
|
-
# Built and installed via ext/mkrf_conf.rb
|
7
|
-
gem 'system_timer', '~> 1.2'
|
8
|
-
end
|
9
4
|
|
10
5
|
# fixes 2.3.0 ffi bundle error
|
11
|
-
gem 'ffi', '~> 1.0.11'
|
6
|
+
gem 'ffi', '~> 1.0.11'
|
data/README.md
CHANGED
@@ -59,6 +59,17 @@ User.find_each do |user|
|
|
59
59
|
end
|
60
60
|
```
|
61
61
|
|
62
|
+
## Aggregation
|
63
|
+
Aggregation collects more data on your system before sending it to Instrumental. This reduces the total amount of data being sent, at the cost of a small amount of additional latency. You can control this feature with the frequency parameter:
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN', :frequency => 15) # send data every 15 seconds
|
67
|
+
I.frequency = 6 # send batches of data every 6 seconds
|
68
|
+
```
|
69
|
+
|
70
|
+
The agent may send data more frequently if you are sending a large number of different metrics. Values between 3 and 15 are generally reasonable. If you want to disable this behavior and send every metric as fast as possible, set frequency to zero or nil. Note that a frequency of zero will still use a seperate thread for performance - it is NOT the same as synchronous mode.
|
71
|
+
|
72
|
+
|
62
73
|
## Server Metrics
|
63
74
|
|
64
75
|
Want server stats like load, memory, etc.? Check out [InstrumentalD](https://github.com/instrumental/instrumentald).
|
@@ -109,6 +120,17 @@ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
|
109
120
|
)
|
110
121
|
```
|
111
122
|
|
123
|
+
### Upgrading from 2.x
|
124
|
+
|
125
|
+
Agent version 3.x drops support for some older rubies, but should otherwise be a drop-in replacement. If you wish to enable Aggregation, enable the agent with the frequency option set to the number of seconds you would like to wait between flushes. For example:
|
126
|
+
|
127
|
+
```
|
128
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
129
|
+
:enabled => Rails.env.production?,
|
130
|
+
:frequency => 15
|
131
|
+
)
|
132
|
+
```
|
133
|
+
|
112
134
|
## Troubleshooting & Help
|
113
135
|
|
114
136
|
We are here to help. Email us at [support@instrumentalapp.com](mailto:support@instrumentalapp.com).
|
data/instrumental_agent.gemspec
CHANGED
@@ -4,13 +4,13 @@ require "instrumental/version"
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "instrumental_agent"
|
6
6
|
s.version = Instrumental::VERSION
|
7
|
-
s.authors = ["
|
7
|
+
s.authors = ["Expected Behavior"]
|
8
8
|
s.email = ["support@instrumentalapp.com"]
|
9
9
|
s.homepage = "http://github.com/instrumental/instrumental_agent-ruby"
|
10
10
|
s.summary = %q{Custom metric monitoring for Ruby applications via Instrumental}
|
11
11
|
s.description = %q{This agent supports Instrumental custom metric monitoring for Ruby applications. It provides high-data reliability at high scale, without ever blocking your process or causing an exception.}
|
12
12
|
s.license = "MIT"
|
13
|
-
s.required_ruby_version = '>= 2.
|
13
|
+
s.required_ruby_version = '>= 2.5.7'
|
14
14
|
|
15
15
|
s.files = `git ls-files`.split("\n")
|
16
16
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/instrumental/agent.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'instrumental/version'
|
2
2
|
require 'instrumental/system_timer'
|
3
|
+
require 'instrumental/command_structs'
|
4
|
+
require 'instrumental/event_aggregator'
|
3
5
|
require 'logger'
|
4
6
|
require 'openssl' rescue nil
|
5
7
|
require 'resolv'
|
@@ -15,14 +17,17 @@ module Instrumental
|
|
15
17
|
EXIT_FLUSH_TIMEOUT = 5
|
16
18
|
HOSTNAME = Socket.gethostbyname(Socket.gethostname).first rescue Socket.gethostname
|
17
19
|
MAX_BUFFER = 5000
|
20
|
+
MAX_AGGREGATOR_SIZE = 5000
|
18
21
|
MAX_RECONNECT_DELAY = 15
|
19
22
|
REPLY_TIMEOUT = 10
|
20
23
|
RESOLUTION_FAILURES_BEFORE_WAITING = 3
|
21
24
|
RESOLUTION_WAIT = 30
|
22
25
|
RESOLVE_TIMEOUT = 1
|
26
|
+
DEFAULT_FREQUENCY = 0
|
27
|
+
VALID_FREQUENCIES = [0, 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60]
|
23
28
|
|
24
29
|
|
25
|
-
attr_accessor :host, :port, :synchronous, :
|
30
|
+
attr_accessor :host, :port, :synchronous, :frequency, :sender_queue, :aggregator_queue, :dns_resolutions, :last_connect_at
|
26
31
|
attr_reader :connection, :enabled, :secure
|
27
32
|
|
28
33
|
def self.logger=(l)
|
@@ -52,6 +57,7 @@ module Instrumental
|
|
52
57
|
# port: 8001
|
53
58
|
# enabled: true
|
54
59
|
# synchronous: false
|
60
|
+
# frequency: 10
|
55
61
|
# secure: true
|
56
62
|
# verify: true
|
57
63
|
@api_key = api_key
|
@@ -73,14 +79,23 @@ module Instrumental
|
|
73
79
|
@port = (@port || default_port).to_i
|
74
80
|
@enabled = options.has_key?(:enabled) ? !!options[:enabled] : true
|
75
81
|
@synchronous = !!options[:synchronous]
|
82
|
+
|
83
|
+
if options.has_key?(:frequency)
|
84
|
+
self.frequency = options[:frequency]
|
85
|
+
else
|
86
|
+
self.frequency = DEFAULT_FREQUENCY
|
87
|
+
end
|
88
|
+
|
89
|
+
@metrician = options[:metrician].nil? ? true : !!options[:metrician]
|
76
90
|
@pid = Process.pid
|
77
91
|
@allow_reconnect = true
|
78
|
-
@certs = certificates
|
79
92
|
@dns_resolutions = 0
|
80
93
|
@last_connect_at = 0
|
81
|
-
|
94
|
+
|
82
95
|
@start_worker_mutex = Mutex.new
|
83
|
-
@
|
96
|
+
@aggregator_queue = Queue.new
|
97
|
+
@sender_queue = Queue.new
|
98
|
+
|
84
99
|
|
85
100
|
setup_cleanup_at_exit if @enabled
|
86
101
|
|
@@ -94,7 +109,9 @@ module Instrumental
|
|
94
109
|
# agent.gauge('load', 1.23)
|
95
110
|
def gauge(metric, value, time = Time.now, count = 1)
|
96
111
|
if valid?(metric, value, time, count) &&
|
97
|
-
|
112
|
+
send_command(Instrumental::Command.new("gauge".freeze, metric, value, time, count))
|
113
|
+
# tempted to "gauge" this to a symbol? Don't. Frozen strings are very fast,
|
114
|
+
# and later we're going to to_s every one of these anyway.
|
98
115
|
value
|
99
116
|
else
|
100
117
|
nil
|
@@ -142,7 +159,7 @@ module Instrumental
|
|
142
159
|
# agent.increment('users')
|
143
160
|
def increment(metric, value = 1, time = Time.now, count = 1)
|
144
161
|
if valid?(metric, value, time, count) &&
|
145
|
-
|
162
|
+
send_command(Instrumental::Command.new("increment".freeze, metric, value, time, count))
|
146
163
|
value
|
147
164
|
else
|
148
165
|
nil
|
@@ -157,7 +174,7 @@ module Instrumental
|
|
157
174
|
# agent.notice('A notice')
|
158
175
|
def notice(note, time = Time.now, duration = 0)
|
159
176
|
if valid_note?(note)
|
160
|
-
send_command(
|
177
|
+
send_command(Instrumental::Notice.new(note, time, duration))
|
161
178
|
note
|
162
179
|
else
|
163
180
|
nil
|
@@ -196,6 +213,22 @@ module Instrumental
|
|
196
213
|
@logger || self.class.logger
|
197
214
|
end
|
198
215
|
|
216
|
+
def frequency=(frequency)
|
217
|
+
freq = frequency.to_i
|
218
|
+
if !VALID_FREQUENCIES.include?(freq)
|
219
|
+
logger.warn "Frequency must be a value that divides evenly into 60: 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, or 60."
|
220
|
+
# this will make all negative numbers and nils into 0s
|
221
|
+
freq = VALID_FREQUENCIES.select{ |f| f < freq }.max.to_i
|
222
|
+
end
|
223
|
+
|
224
|
+
@frequency = if(@synchronous)
|
225
|
+
logger.warn "Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode."
|
226
|
+
0
|
227
|
+
else
|
228
|
+
freq
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
199
232
|
# Stopping the agent will immediately stop all communication
|
200
233
|
# to Instrumental. If you call this and submit another metric,
|
201
234
|
# the agent will start again.
|
@@ -207,12 +240,19 @@ module Instrumental
|
|
207
240
|
#
|
208
241
|
def stop
|
209
242
|
disconnect
|
210
|
-
if @
|
211
|
-
@
|
212
|
-
@
|
243
|
+
if @sender_thread
|
244
|
+
@sender_thread.kill
|
245
|
+
@sender_thread = nil
|
246
|
+
end
|
247
|
+
if @aggregator_thread
|
248
|
+
@aggregator_thread.kill
|
249
|
+
@aggregator_thread = nil
|
213
250
|
end
|
214
|
-
if @
|
215
|
-
@
|
251
|
+
if @sender_queue
|
252
|
+
@sender_queue.clear
|
253
|
+
end
|
254
|
+
if @aggregator_queue
|
255
|
+
@aggregator_queue.clear
|
216
256
|
end
|
217
257
|
end
|
218
258
|
|
@@ -222,18 +262,25 @@ module Instrumental
|
|
222
262
|
# where at_exit is bypassed like Resque workers.
|
223
263
|
def cleanup
|
224
264
|
if running?
|
225
|
-
logger.info "Cleaning up agent,
|
265
|
+
logger.info "Cleaning up agent, aggregator_size: #{@aggregator_queue.size}, thread_running: #{@aggregator_thread.alive?}"
|
266
|
+
logger.info "Cleaning up agent, queue size: #{@sender_queue.size}, thread running: #{@sender_thread.alive?}"
|
226
267
|
@allow_reconnect = false
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
268
|
+
begin
|
269
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) do
|
270
|
+
@aggregator_queue << ['exit']
|
271
|
+
@aggregator_thread.join
|
272
|
+
@sender_queue << ['exit']
|
273
|
+
@sender_thread.join
|
274
|
+
end
|
275
|
+
rescue Timeout::Error
|
276
|
+
total_size = @sender_queue&.size.to_i +
|
277
|
+
@aggregator_queue&.size.to_i +
|
278
|
+
@event_aggregator&.size.to_i
|
279
|
+
|
280
|
+
if total_size > 0
|
281
|
+
logger.error "Timed out working agent thread on exit, dropping #{total_size} metrics"
|
282
|
+
else
|
283
|
+
logger.error "Timed out Instrumental Agent, exiting"
|
237
284
|
end
|
238
285
|
end
|
239
286
|
end
|
@@ -271,7 +318,8 @@ module Instrumental
|
|
271
318
|
end
|
272
319
|
|
273
320
|
def report_exception(e)
|
274
|
-
|
321
|
+
# puts "--- Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
322
|
+
logger.error "Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
275
323
|
end
|
276
324
|
|
277
325
|
def ipv4_address_for_host(host, port, moment_to_connect = Time.now.to_i)
|
@@ -291,44 +339,41 @@ module Instrumental
|
|
291
339
|
nil
|
292
340
|
end
|
293
341
|
|
294
|
-
def send_command(
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
queue_message(cmd, { :synchronous => @synchronous })
|
303
|
-
else
|
304
|
-
if !@queue_full_warning
|
305
|
-
@queue_full_warning = true
|
306
|
-
logger.warn "Queue full(#{@queue.size}), dropping commands..."
|
307
|
-
end
|
308
|
-
logger.debug "Dropping command, queue full(#{@queue.size}): #{cmd.chomp}"
|
309
|
-
nil
|
310
|
-
end
|
342
|
+
def send_command(command)
|
343
|
+
return logger.debug(command.to_s) unless enabled?
|
344
|
+
start_workers
|
345
|
+
critical_queue = frequency.to_i == 0 ? @sender_queue : @aggregator_queue
|
346
|
+
if critical_queue && critical_queue.size < MAX_BUFFER
|
347
|
+
@queue_full_warning = false
|
348
|
+
logger.debug "Queueing: #{command.to_s}"
|
349
|
+
queue_message(command, { :synchronous => @synchronous })
|
311
350
|
else
|
312
|
-
|
351
|
+
if !@queue_full_warning
|
352
|
+
@queue_full_warning = true
|
353
|
+
logger.warn "Queue full(#{critical_queue.size}), dropping commands..."
|
354
|
+
end
|
355
|
+
logger.debug "Dropping command, queue full(#{critical_queue.size}): #{command.to_s}"
|
356
|
+
nil
|
313
357
|
end
|
314
358
|
end
|
315
359
|
|
316
360
|
def queue_message(message, options = {})
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
@
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
361
|
+
return message unless enabled?
|
362
|
+
|
363
|
+
# imagine it's a reverse merge, but with fewer allocations
|
364
|
+
options[:allow_reconnect] = @allow_reconnect unless options.has_key?(:allow_reconnect)
|
365
|
+
|
366
|
+
if options.delete(:synchronous)
|
367
|
+
options[:sync_resource] ||= ConditionVariable.new
|
368
|
+
@sync_mutex.synchronize {
|
369
|
+
queue = message == "flush" ? @aggregator_queue : @sender_queue
|
370
|
+
queue << [message, options]
|
371
|
+
options[:sync_resource].wait(@sync_mutex)
|
372
|
+
}
|
373
|
+
elsif frequency.to_i == 0
|
374
|
+
@sender_queue << [message, options]
|
375
|
+
else
|
376
|
+
@aggregator_queue << [message, options]
|
332
377
|
end
|
333
378
|
message
|
334
379
|
end
|
@@ -350,31 +395,15 @@ module Instrumental
|
|
350
395
|
|
351
396
|
def test_connection
|
352
397
|
begin
|
353
|
-
|
354
|
-
# on Ruby 1.8.6, 1.8.7 or 1.9.1, read_nonblock does not exist,
|
355
|
-
# and so the case of testing socket liveliness via a nonblocking
|
356
|
-
# read that catches a wait condition won't work.
|
357
|
-
#
|
358
|
-
# We grab the SSL socket's underlying IO object and perform the
|
359
|
-
# non blocking read there in order to ensure the socket is still
|
360
|
-
# valid
|
361
|
-
if @socket.respond_to?(:read_nonblock)
|
362
|
-
@socket.read_nonblock(1)
|
363
|
-
elsif @socket.respond_to?(:io)
|
364
|
-
# The SSL Socket may send down additional data at close time,
|
365
|
-
# so we perform two nonblocking reads, one to pull any pending
|
366
|
-
# data on the socket, and the second to actually perform the connection
|
367
|
-
# liveliness test
|
368
|
-
@socket.io.read_nonblock(1024) && @socket.io.read_nonblock(1024)
|
369
|
-
end
|
398
|
+
@socket.read_nonblock(1)
|
370
399
|
rescue *wait_exceptions
|
371
400
|
# noop
|
372
401
|
end
|
373
402
|
end
|
374
403
|
|
375
|
-
def
|
404
|
+
def start_workers
|
376
405
|
# NOTE: We need a mutex around both `running?` and thread creation,
|
377
|
-
# otherwise we could create
|
406
|
+
# otherwise we could create too many threads.
|
378
407
|
# Return early and queue the message if another thread is
|
379
408
|
# starting the worker.
|
380
409
|
return if !@start_worker_mutex.try_lock
|
@@ -384,13 +413,34 @@ module Instrumental
|
|
384
413
|
disconnect
|
385
414
|
address = ipv4_address_for_host(@host, @port)
|
386
415
|
if address
|
387
|
-
@pid
|
416
|
+
new_pid = if @pid != Process.pid
|
417
|
+
@pid = Process.pid
|
418
|
+
true
|
419
|
+
else
|
420
|
+
false
|
421
|
+
end
|
422
|
+
|
388
423
|
@sync_mutex = Mutex.new
|
389
424
|
@failures = 0
|
390
425
|
@sockaddr_in = Socket.pack_sockaddr_in(@port, address)
|
391
|
-
|
392
|
-
|
393
|
-
|
426
|
+
|
427
|
+
logger.info "Starting aggregator thread"
|
428
|
+
if !@aggregator_thread&.alive?
|
429
|
+
if new_pid
|
430
|
+
@event_aggregator = nil
|
431
|
+
@aggregator_queue = Queue.new
|
432
|
+
end
|
433
|
+
@aggregator_thread = Thread.new do
|
434
|
+
run_aggregator_loop
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
if !@sender_thread&.alive?
|
439
|
+
logger.info "Starting sender thread"
|
440
|
+
@sender_queue = Queue.new if new_pid
|
441
|
+
@sender_thread = Thread.new do
|
442
|
+
run_sender_loop
|
443
|
+
end
|
394
444
|
end
|
395
445
|
end
|
396
446
|
ensure
|
@@ -426,82 +476,155 @@ module Instrumental
|
|
426
476
|
sock
|
427
477
|
end
|
428
478
|
|
429
|
-
def
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
479
|
+
def run_aggregator_loop
|
480
|
+
# if the sender queue is some level of full, should we keep aggregating until it empties out?
|
481
|
+
# what does this mean for aggregation slices - aggregating to nearest frequency will
|
482
|
+
# make the object needlessly larger, when minute resolution is what we have on the server
|
483
|
+
begin
|
484
|
+
loop do
|
485
|
+
now = Time.now.to_i
|
486
|
+
time_to_wait = if frequency == 0
|
487
|
+
0
|
488
|
+
else
|
489
|
+
next_frequency = (now - (now % frequency)) + frequency
|
490
|
+
time_to_wait = [(next_frequency - Time.now.to_f), 0].max
|
491
|
+
end
|
492
|
+
|
493
|
+
command_and_args, command_options = if @event_aggregator&.size.to_i > MAX_AGGREGATOR_SIZE
|
494
|
+
logger.info "Aggregator full, flushing early with #{MAX_AGGREGATOR_SIZE} metrics."
|
495
|
+
command_and_args, command_options = ['forward', {}]
|
496
|
+
else
|
497
|
+
begin
|
498
|
+
with_timeout(time_to_wait) do
|
499
|
+
@aggregator_queue.pop
|
500
|
+
end
|
501
|
+
rescue Timeout::Error
|
502
|
+
['forward', {}]
|
503
|
+
end
|
504
|
+
end
|
505
|
+
if command_and_args
|
506
|
+
case command_and_args
|
507
|
+
when 'exit'
|
508
|
+
if !@event_aggregator.nil?
|
509
|
+
@sender_queue << @event_aggregator
|
510
|
+
@event_aggregator = nil
|
511
|
+
end
|
512
|
+
logger.info "Exiting, #{@aggregator_queue.size} commands remain"
|
513
|
+
return true
|
514
|
+
when 'flush'
|
515
|
+
if !@event_aggregator.nil?
|
516
|
+
@sender_queue << @event_aggregator
|
517
|
+
@event_aggregator = nil
|
518
|
+
end
|
519
|
+
@sender_queue << ['flush', command_options]
|
520
|
+
when 'forward'
|
521
|
+
if !@event_aggregator.nil?
|
522
|
+
next if @sender_queue.size > 0 && @sender_queue.num_waiting < 1
|
523
|
+
@sender_queue << @event_aggregator
|
524
|
+
@event_aggregator = nil
|
525
|
+
end
|
526
|
+
when Notice
|
527
|
+
@sender_queue << [command_and_args, command_options]
|
528
|
+
else
|
529
|
+
@event_aggregator = EventAggregator.new(frequency: @frequency) if @event_aggregator.nil?
|
530
|
+
|
531
|
+
logger.debug "Sending: #{command_and_args} to aggregator"
|
532
|
+
@event_aggregator.put(command_and_args)
|
533
|
+
end
|
534
|
+
command_and_args = nil
|
535
|
+
command_options = nil
|
536
|
+
end
|
537
|
+
end
|
538
|
+
rescue Exception => err
|
539
|
+
report_exception(err)
|
435
540
|
end
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
"hostname" => HOSTNAME,
|
440
|
-
"pid" => Process.pid,
|
441
|
-
"runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
|
442
|
-
"platform" => RUBY_PLATFORM
|
443
|
-
}.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
|
444
|
-
|
445
|
-
send_with_reply_timeout "hello #{hello_options}"
|
446
|
-
send_with_reply_timeout "authenticate #{@api_key}"
|
541
|
+
end
|
542
|
+
|
543
|
+
def run_sender_loop
|
447
544
|
@failures = 0
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
545
|
+
begin
|
546
|
+
logger.info "connecting to collector"
|
547
|
+
command_and_args = nil
|
548
|
+
command_options = nil
|
549
|
+
with_timeout(CONNECT_TIMEOUT) do
|
550
|
+
@socket = open_socket(@sockaddr_in, @secure, @verify_cert)
|
551
|
+
end
|
552
|
+
logger.info "connected to collector at #{host}:#{port}"
|
553
|
+
hello_options = {
|
554
|
+
"version" => "ruby/instrumental_agent/#{VERSION}",
|
555
|
+
"hostname" => HOSTNAME,
|
556
|
+
"pid" => Process.pid,
|
557
|
+
"runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
|
558
|
+
"platform" => RUBY_PLATFORM
|
559
|
+
}.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
|
560
|
+
|
561
|
+
send_with_reply_timeout "hello #{hello_options}"
|
562
|
+
send_with_reply_timeout "authenticate #{@api_key}"
|
563
|
+
|
564
|
+
loop do
|
565
|
+
command_and_args, command_options = @sender_queue.pop
|
566
|
+
if command_and_args
|
567
|
+
sync_resource = command_options && command_options[:sync_resource]
|
568
|
+
test_connection
|
569
|
+
case command_and_args
|
570
|
+
when 'exit'
|
571
|
+
logger.info "Exiting, #{@sender_queue.size} commands remain"
|
572
|
+
return true
|
573
|
+
when 'flush'
|
574
|
+
release_resource = true
|
575
|
+
when EventAggregator
|
576
|
+
command_and_args.values.values.each do |command|
|
577
|
+
logger.debug "Sending: #{command}"
|
578
|
+
@socket.puts command
|
579
|
+
end
|
580
|
+
else
|
581
|
+
logger.debug "Sending: #{command_and_args}"
|
582
|
+
@socket.puts command_and_args
|
583
|
+
end
|
584
|
+
command_and_args = nil
|
585
|
+
command_options = nil
|
586
|
+
if sync_resource
|
587
|
+
@sync_mutex.synchronize do
|
588
|
+
sync_resource.signal
|
589
|
+
end
|
468
590
|
end
|
469
591
|
end
|
470
592
|
end
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
when EOFError
|
593
|
+
rescue Exception => err
|
594
|
+
allow_reconnect = @allow_reconnect
|
595
|
+
case err
|
596
|
+
when EOFError
|
476
597
|
# nop
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
598
|
+
when Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::EADDRINUSE, Timeout::Error, OpenSSL::SSL::SSLError
|
599
|
+
# If the connection has been refused by Instrumental
|
600
|
+
# or we cannot reach the server
|
601
|
+
# or the connection state of this socket is in a race
|
602
|
+
# or SSL is not functioning properly for some reason
|
603
|
+
logger.error "unable to connect to Instrumental, hanging up with #{@sender_queue.size} messages remaining"
|
604
|
+
logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
|
605
|
+
allow_reconnect = false
|
606
|
+
else
|
607
|
+
report_exception(err)
|
608
|
+
end
|
609
|
+
if allow_reconnect == false ||
|
610
|
+
(command_options && command_options[:allow_reconnect] == false)
|
611
|
+
logger.info "Not trying to reconnect"
|
612
|
+
@failures = 0
|
613
|
+
return
|
614
|
+
end
|
615
|
+
if command_and_args
|
616
|
+
logger.debug "requeueing: #{command_and_args}"
|
617
|
+
@sender_queue << command_and_args
|
618
|
+
end
|
619
|
+
disconnect
|
620
|
+
@failures += 1
|
621
|
+
delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
|
622
|
+
logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
|
623
|
+
sleep delay
|
624
|
+
retry
|
625
|
+
ensure
|
626
|
+
disconnect
|
496
627
|
end
|
497
|
-
disconnect
|
498
|
-
@failures += 1
|
499
|
-
delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
|
500
|
-
logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
|
501
|
-
sleep delay
|
502
|
-
retry
|
503
|
-
ensure
|
504
|
-
disconnect
|
505
628
|
end
|
506
629
|
|
507
630
|
def setup_cleanup_at_exit
|
@@ -511,7 +634,11 @@ module Instrumental
|
|
511
634
|
end
|
512
635
|
|
513
636
|
def running?
|
514
|
-
!@
|
637
|
+
!@sender_thread.nil? &&
|
638
|
+
!@aggregator_thread.nil? &&
|
639
|
+
@pid == Process.pid &&
|
640
|
+
@sender_thread.alive? &&
|
641
|
+
@aggregator_thread.alive?
|
515
642
|
end
|
516
643
|
|
517
644
|
def flush_socket(socket)
|
@@ -541,18 +668,5 @@ module Instrumental
|
|
541
668
|
def allows_secure?
|
542
669
|
defined?(OpenSSL)
|
543
670
|
end
|
544
|
-
|
545
|
-
def certificates
|
546
|
-
if allows_secure?
|
547
|
-
base_dir = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
548
|
-
%w{equifax geotrust rapidssl}.map do |name|
|
549
|
-
OpenSSL::X509::Certificate.new(File.open(File.join(base_dir, "certs", "#{name}.ca.pem")))
|
550
|
-
end
|
551
|
-
else
|
552
|
-
[]
|
553
|
-
end
|
554
|
-
end
|
555
|
-
|
556
671
|
end
|
557
|
-
|
558
672
|
end
|