instrumental_agent 2.0.0.alpha → 3.0.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/.travis.yml +4 -5
- data/CHANGELOG.md +18 -0
- data/Gemfile +1 -6
- data/README.md +22 -0
- data/instrumental_agent.gemspec +2 -2
- data/lib/instrumental/agent.rb +277 -163
- data/lib/instrumental/capistrano.rb +4 -46
- data/lib/instrumental/capistrano/capistrano2.rb +47 -0
- data/lib/instrumental/capistrano/capistrano3.rake +56 -0
- data/lib/instrumental/command_structs.rb +32 -0
- data/lib/instrumental/event_aggregator.rb +26 -0
- data/lib/instrumental/version.rb +1 -1
- data/spec/agent_spec.rb +436 -43
- data/spec/command_struct_specs.rb +20 -0
- data/spec/event_aggregator_spec.rb +53 -0
- data/spec/spec_helper.rb +9 -0
- metadata +34 -33
- data/certs/equifax.ca.pem +0 -69
- data/certs/geotrust.ca.pem +0 -80
- data/certs/rapidssl.ca.pem +0 -94
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a86fe597ab71c9660d590e42ef37e0d9012a4f5adbe7742d265f3d20ffda1b9e
|
4
|
+
data.tar.gz: cd56273db7523852982b58162361359fc3c9fcd3b8cae19bf87387bf63fe26b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c68427fe4c900db650e45b46423c808263d86df275b9f49b44896973f4db3e25cd61b8fe9a0b67cba54acbe831b4c00a7cbfb8c7b7d6cc30959159e9e149d3dd
|
7
|
+
data.tar.gz: 57f5554f876c9311d76305c60b754af1c7d1a8bb63b8682355217b014b927e1d0f500bea37daff954d5149d4015b1be80793f9f2ed8f713928b4af4d428e576f
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.3
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
### 3.0.0.beta [October 8, 2020]
|
2
|
+
* Drop support for outdated versions of Ruby
|
3
|
+
* Explicitly add support for new versions of Ruby
|
4
|
+
* Add support for client-side aggregation
|
5
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
6
|
+
|
7
|
+
### 3.0.0.alpha [August 22, 2019]
|
8
|
+
* Drop support for outdated versions of Ruby
|
9
|
+
* Explicitly add support for new versions of Ruby
|
10
|
+
* Better handling of SSL errors when connecting to Instrumental
|
11
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
12
|
+
|
13
|
+
### 2.1.0 [January 19, 2018]
|
14
|
+
* Add support for capistrano 3
|
15
|
+
|
16
|
+
### 2.0.0 [August 21, 2017]
|
17
|
+
* Add automatic tracking of common application metrics, official release
|
18
|
+
|
1
19
|
### 2.0.0.alpha [August 18, 2017]
|
2
20
|
* Add automatic tracking of common application metrics
|
3
21
|
|
data/Gemfile
CHANGED
@@ -1,11 +1,6 @@
|
|
1
1
|
source "https://rubygems.org"
|
2
2
|
|
3
3
|
gemspec
|
4
|
-
ruby_engine = defined?(RUBY_ENGINE) && RUBY_ENGINE
|
5
|
-
if RUBY_VERSION < "1.9" && !%w{jruby rbx}.include?(ruby_engine)
|
6
|
-
# Built and installed via ext/mkrf_conf.rb
|
7
|
-
gem 'system_timer', '~> 1.2'
|
8
|
-
end
|
9
4
|
|
10
5
|
# fixes 2.3.0 ffi bundle error
|
11
|
-
gem 'ffi', '~> 1.0.11'
|
6
|
+
gem 'ffi', '~> 1.0.11'
|
data/README.md
CHANGED
@@ -59,6 +59,17 @@ User.find_each do |user|
|
|
59
59
|
end
|
60
60
|
```
|
61
61
|
|
62
|
+
## Aggregation
|
63
|
+
Aggregation collects more data on your system before sending it to Instrumental. This reduces the total amount of data being sent, at the cost of a small amount of additional latency. You can control this feature with the frequency parameter:
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN', :frequency => 15) # send data every 15 seconds
|
67
|
+
I.frequency = 6 # send batches of data every 6 seconds
|
68
|
+
```
|
69
|
+
|
70
|
+
The agent may send data more frequently if you are sending a large number of different metrics. Values between 3 and 15 are generally reasonable. If you want to disable this behavior and send every metric as fast as possible, set frequency to zero or nil. Note that a frequency of zero will still use a seperate thread for performance - it is NOT the same as synchronous mode.
|
71
|
+
|
72
|
+
|
62
73
|
## Server Metrics
|
63
74
|
|
64
75
|
Want server stats like load, memory, etc.? Check out [InstrumentalD](https://github.com/instrumental/instrumentald).
|
@@ -109,6 +120,17 @@ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
|
109
120
|
)
|
110
121
|
```
|
111
122
|
|
123
|
+
### Upgrading from 2.x
|
124
|
+
|
125
|
+
Agent version 3.x drops support for some older rubies, but should otherwise be a drop-in replacement. If you wish to enable Aggregation, enable the agent with the frequency option set to the number of seconds you would like to wait between flushes. For example:
|
126
|
+
|
127
|
+
```
|
128
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
129
|
+
:enabled => Rails.env.production?,
|
130
|
+
:frequency => 15
|
131
|
+
)
|
132
|
+
```
|
133
|
+
|
112
134
|
## Troubleshooting & Help
|
113
135
|
|
114
136
|
We are here to help. Email us at [support@instrumentalapp.com](mailto:support@instrumentalapp.com).
|
data/instrumental_agent.gemspec
CHANGED
@@ -4,13 +4,13 @@ require "instrumental/version"
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "instrumental_agent"
|
6
6
|
s.version = Instrumental::VERSION
|
7
|
-
s.authors = ["
|
7
|
+
s.authors = ["Expected Behavior"]
|
8
8
|
s.email = ["support@instrumentalapp.com"]
|
9
9
|
s.homepage = "http://github.com/instrumental/instrumental_agent-ruby"
|
10
10
|
s.summary = %q{Custom metric monitoring for Ruby applications via Instrumental}
|
11
11
|
s.description = %q{This agent supports Instrumental custom metric monitoring for Ruby applications. It provides high-data reliability at high scale, without ever blocking your process or causing an exception.}
|
12
12
|
s.license = "MIT"
|
13
|
-
s.required_ruby_version = '>= 2.
|
13
|
+
s.required_ruby_version = '>= 2.5.7'
|
14
14
|
|
15
15
|
s.files = `git ls-files`.split("\n")
|
16
16
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/instrumental/agent.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'instrumental/version'
|
2
2
|
require 'instrumental/system_timer'
|
3
|
+
require 'instrumental/command_structs'
|
4
|
+
require 'instrumental/event_aggregator'
|
3
5
|
require 'logger'
|
4
6
|
require 'openssl' rescue nil
|
5
7
|
require 'resolv'
|
@@ -15,14 +17,17 @@ module Instrumental
|
|
15
17
|
EXIT_FLUSH_TIMEOUT = 5
|
16
18
|
HOSTNAME = Socket.gethostbyname(Socket.gethostname).first rescue Socket.gethostname
|
17
19
|
MAX_BUFFER = 5000
|
20
|
+
MAX_AGGREGATOR_SIZE = 5000
|
18
21
|
MAX_RECONNECT_DELAY = 15
|
19
22
|
REPLY_TIMEOUT = 10
|
20
23
|
RESOLUTION_FAILURES_BEFORE_WAITING = 3
|
21
24
|
RESOLUTION_WAIT = 30
|
22
25
|
RESOLVE_TIMEOUT = 1
|
26
|
+
DEFAULT_FREQUENCY = 0
|
27
|
+
VALID_FREQUENCIES = [0, 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60]
|
23
28
|
|
24
29
|
|
25
|
-
attr_accessor :host, :port, :synchronous, :
|
30
|
+
attr_accessor :host, :port, :synchronous, :frequency, :sender_queue, :aggregator_queue, :dns_resolutions, :last_connect_at
|
26
31
|
attr_reader :connection, :enabled, :secure
|
27
32
|
|
28
33
|
def self.logger=(l)
|
@@ -52,6 +57,7 @@ module Instrumental
|
|
52
57
|
# port: 8001
|
53
58
|
# enabled: true
|
54
59
|
# synchronous: false
|
60
|
+
# frequency: 10
|
55
61
|
# secure: true
|
56
62
|
# verify: true
|
57
63
|
@api_key = api_key
|
@@ -73,14 +79,23 @@ module Instrumental
|
|
73
79
|
@port = (@port || default_port).to_i
|
74
80
|
@enabled = options.has_key?(:enabled) ? !!options[:enabled] : true
|
75
81
|
@synchronous = !!options[:synchronous]
|
82
|
+
|
83
|
+
if options.has_key?(:frequency)
|
84
|
+
self.frequency = options[:frequency]
|
85
|
+
else
|
86
|
+
self.frequency = DEFAULT_FREQUENCY
|
87
|
+
end
|
88
|
+
|
89
|
+
@metrician = options[:metrician].nil? ? true : !!options[:metrician]
|
76
90
|
@pid = Process.pid
|
77
91
|
@allow_reconnect = true
|
78
|
-
@certs = certificates
|
79
92
|
@dns_resolutions = 0
|
80
93
|
@last_connect_at = 0
|
81
|
-
|
94
|
+
|
82
95
|
@start_worker_mutex = Mutex.new
|
83
|
-
@
|
96
|
+
@aggregator_queue = Queue.new
|
97
|
+
@sender_queue = Queue.new
|
98
|
+
|
84
99
|
|
85
100
|
setup_cleanup_at_exit if @enabled
|
86
101
|
|
@@ -94,7 +109,9 @@ module Instrumental
|
|
94
109
|
# agent.gauge('load', 1.23)
|
95
110
|
def gauge(metric, value, time = Time.now, count = 1)
|
96
111
|
if valid?(metric, value, time, count) &&
|
97
|
-
|
112
|
+
send_command(Instrumental::Command.new("gauge".freeze, metric, value, time, count))
|
113
|
+
# tempted to "gauge" this to a symbol? Don't. Frozen strings are very fast,
|
114
|
+
# and later we're going to to_s every one of these anyway.
|
98
115
|
value
|
99
116
|
else
|
100
117
|
nil
|
@@ -142,7 +159,7 @@ module Instrumental
|
|
142
159
|
# agent.increment('users')
|
143
160
|
def increment(metric, value = 1, time = Time.now, count = 1)
|
144
161
|
if valid?(metric, value, time, count) &&
|
145
|
-
|
162
|
+
send_command(Instrumental::Command.new("increment".freeze, metric, value, time, count))
|
146
163
|
value
|
147
164
|
else
|
148
165
|
nil
|
@@ -157,7 +174,7 @@ module Instrumental
|
|
157
174
|
# agent.notice('A notice')
|
158
175
|
def notice(note, time = Time.now, duration = 0)
|
159
176
|
if valid_note?(note)
|
160
|
-
send_command(
|
177
|
+
send_command(Instrumental::Notice.new(note, time, duration))
|
161
178
|
note
|
162
179
|
else
|
163
180
|
nil
|
@@ -196,6 +213,22 @@ module Instrumental
|
|
196
213
|
@logger || self.class.logger
|
197
214
|
end
|
198
215
|
|
216
|
+
def frequency=(frequency)
|
217
|
+
freq = frequency.to_i
|
218
|
+
if !VALID_FREQUENCIES.include?(freq)
|
219
|
+
logger.warn "Frequency must be a value that divides evenly into 60: 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, or 60."
|
220
|
+
# this will make all negative numbers and nils into 0s
|
221
|
+
freq = VALID_FREQUENCIES.select{ |f| f < freq }.max.to_i
|
222
|
+
end
|
223
|
+
|
224
|
+
@frequency = if(@synchronous)
|
225
|
+
logger.warn "Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode."
|
226
|
+
0
|
227
|
+
else
|
228
|
+
freq
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
199
232
|
# Stopping the agent will immediately stop all communication
|
200
233
|
# to Instrumental. If you call this and submit another metric,
|
201
234
|
# the agent will start again.
|
@@ -207,12 +240,19 @@ module Instrumental
|
|
207
240
|
#
|
208
241
|
def stop
|
209
242
|
disconnect
|
210
|
-
if @
|
211
|
-
@
|
212
|
-
@
|
243
|
+
if @sender_thread
|
244
|
+
@sender_thread.kill
|
245
|
+
@sender_thread = nil
|
246
|
+
end
|
247
|
+
if @aggregator_thread
|
248
|
+
@aggregator_thread.kill
|
249
|
+
@aggregator_thread = nil
|
213
250
|
end
|
214
|
-
if @
|
215
|
-
@
|
251
|
+
if @sender_queue
|
252
|
+
@sender_queue.clear
|
253
|
+
end
|
254
|
+
if @aggregator_queue
|
255
|
+
@aggregator_queue.clear
|
216
256
|
end
|
217
257
|
end
|
218
258
|
|
@@ -222,18 +262,25 @@ module Instrumental
|
|
222
262
|
# where at_exit is bypassed like Resque workers.
|
223
263
|
def cleanup
|
224
264
|
if running?
|
225
|
-
logger.info "Cleaning up agent,
|
265
|
+
logger.info "Cleaning up agent, aggregator_size: #{@aggregator_queue.size}, thread_running: #{@aggregator_thread.alive?}"
|
266
|
+
logger.info "Cleaning up agent, queue size: #{@sender_queue.size}, thread running: #{@sender_thread.alive?}"
|
226
267
|
@allow_reconnect = false
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
268
|
+
begin
|
269
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) do
|
270
|
+
@aggregator_queue << ['exit']
|
271
|
+
@aggregator_thread.join
|
272
|
+
@sender_queue << ['exit']
|
273
|
+
@sender_thread.join
|
274
|
+
end
|
275
|
+
rescue Timeout::Error
|
276
|
+
total_size = @sender_queue&.size.to_i +
|
277
|
+
@aggregator_queue&.size.to_i +
|
278
|
+
@event_aggregator&.size.to_i
|
279
|
+
|
280
|
+
if total_size > 0
|
281
|
+
logger.error "Timed out working agent thread on exit, dropping #{total_size} metrics"
|
282
|
+
else
|
283
|
+
logger.error "Timed out Instrumental Agent, exiting"
|
237
284
|
end
|
238
285
|
end
|
239
286
|
end
|
@@ -271,7 +318,8 @@ module Instrumental
|
|
271
318
|
end
|
272
319
|
|
273
320
|
def report_exception(e)
|
274
|
-
|
321
|
+
# puts "--- Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
322
|
+
logger.error "Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
275
323
|
end
|
276
324
|
|
277
325
|
def ipv4_address_for_host(host, port, moment_to_connect = Time.now.to_i)
|
@@ -291,44 +339,41 @@ module Instrumental
|
|
291
339
|
nil
|
292
340
|
end
|
293
341
|
|
294
|
-
def send_command(
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
queue_message(cmd, { :synchronous => @synchronous })
|
303
|
-
else
|
304
|
-
if !@queue_full_warning
|
305
|
-
@queue_full_warning = true
|
306
|
-
logger.warn "Queue full(#{@queue.size}), dropping commands..."
|
307
|
-
end
|
308
|
-
logger.debug "Dropping command, queue full(#{@queue.size}): #{cmd.chomp}"
|
309
|
-
nil
|
310
|
-
end
|
342
|
+
def send_command(command)
|
343
|
+
return logger.debug(command.to_s) unless enabled?
|
344
|
+
start_workers
|
345
|
+
critical_queue = frequency.to_i == 0 ? @sender_queue : @aggregator_queue
|
346
|
+
if critical_queue && critical_queue.size < MAX_BUFFER
|
347
|
+
@queue_full_warning = false
|
348
|
+
logger.debug "Queueing: #{command.to_s}"
|
349
|
+
queue_message(command, { :synchronous => @synchronous })
|
311
350
|
else
|
312
|
-
|
351
|
+
if !@queue_full_warning
|
352
|
+
@queue_full_warning = true
|
353
|
+
logger.warn "Queue full(#{critical_queue.size}), dropping commands..."
|
354
|
+
end
|
355
|
+
logger.debug "Dropping command, queue full(#{critical_queue.size}): #{command.to_s}"
|
356
|
+
nil
|
313
357
|
end
|
314
358
|
end
|
315
359
|
|
316
360
|
def queue_message(message, options = {})
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
@
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
361
|
+
return message unless enabled?
|
362
|
+
|
363
|
+
# imagine it's a reverse merge, but with fewer allocations
|
364
|
+
options[:allow_reconnect] = @allow_reconnect unless options.has_key?(:allow_reconnect)
|
365
|
+
|
366
|
+
if options.delete(:synchronous)
|
367
|
+
options[:sync_resource] ||= ConditionVariable.new
|
368
|
+
@sync_mutex.synchronize {
|
369
|
+
queue = message == "flush" ? @aggregator_queue : @sender_queue
|
370
|
+
queue << [message, options]
|
371
|
+
options[:sync_resource].wait(@sync_mutex)
|
372
|
+
}
|
373
|
+
elsif frequency.to_i == 0
|
374
|
+
@sender_queue << [message, options]
|
375
|
+
else
|
376
|
+
@aggregator_queue << [message, options]
|
332
377
|
end
|
333
378
|
message
|
334
379
|
end
|
@@ -350,31 +395,15 @@ module Instrumental
|
|
350
395
|
|
351
396
|
def test_connection
|
352
397
|
begin
|
353
|
-
|
354
|
-
# on Ruby 1.8.6, 1.8.7 or 1.9.1, read_nonblock does not exist,
|
355
|
-
# and so the case of testing socket liveliness via a nonblocking
|
356
|
-
# read that catches a wait condition won't work.
|
357
|
-
#
|
358
|
-
# We grab the SSL socket's underlying IO object and perform the
|
359
|
-
# non blocking read there in order to ensure the socket is still
|
360
|
-
# valid
|
361
|
-
if @socket.respond_to?(:read_nonblock)
|
362
|
-
@socket.read_nonblock(1)
|
363
|
-
elsif @socket.respond_to?(:io)
|
364
|
-
# The SSL Socket may send down additional data at close time,
|
365
|
-
# so we perform two nonblocking reads, one to pull any pending
|
366
|
-
# data on the socket, and the second to actually perform the connection
|
367
|
-
# liveliness test
|
368
|
-
@socket.io.read_nonblock(1024) && @socket.io.read_nonblock(1024)
|
369
|
-
end
|
398
|
+
@socket.read_nonblock(1)
|
370
399
|
rescue *wait_exceptions
|
371
400
|
# noop
|
372
401
|
end
|
373
402
|
end
|
374
403
|
|
375
|
-
def
|
404
|
+
def start_workers
|
376
405
|
# NOTE: We need a mutex around both `running?` and thread creation,
|
377
|
-
# otherwise we could create
|
406
|
+
# otherwise we could create too many threads.
|
378
407
|
# Return early and queue the message if another thread is
|
379
408
|
# starting the worker.
|
380
409
|
return if !@start_worker_mutex.try_lock
|
@@ -384,13 +413,34 @@ module Instrumental
|
|
384
413
|
disconnect
|
385
414
|
address = ipv4_address_for_host(@host, @port)
|
386
415
|
if address
|
387
|
-
@pid
|
416
|
+
new_pid = if @pid != Process.pid
|
417
|
+
@pid = Process.pid
|
418
|
+
true
|
419
|
+
else
|
420
|
+
false
|
421
|
+
end
|
422
|
+
|
388
423
|
@sync_mutex = Mutex.new
|
389
424
|
@failures = 0
|
390
425
|
@sockaddr_in = Socket.pack_sockaddr_in(@port, address)
|
391
|
-
|
392
|
-
|
393
|
-
|
426
|
+
|
427
|
+
logger.info "Starting aggregator thread"
|
428
|
+
if !@aggregator_thread&.alive?
|
429
|
+
if new_pid
|
430
|
+
@event_aggregator = nil
|
431
|
+
@aggregator_queue = Queue.new
|
432
|
+
end
|
433
|
+
@aggregator_thread = Thread.new do
|
434
|
+
run_aggregator_loop
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
if !@sender_thread&.alive?
|
439
|
+
logger.info "Starting sender thread"
|
440
|
+
@sender_queue = Queue.new if new_pid
|
441
|
+
@sender_thread = Thread.new do
|
442
|
+
run_sender_loop
|
443
|
+
end
|
394
444
|
end
|
395
445
|
end
|
396
446
|
ensure
|
@@ -426,82 +476,155 @@ module Instrumental
|
|
426
476
|
sock
|
427
477
|
end
|
428
478
|
|
429
|
-
def
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
479
|
+
def run_aggregator_loop
|
480
|
+
# if the sender queue is some level of full, should we keep aggregating until it empties out?
|
481
|
+
# what does this mean for aggregation slices - aggregating to nearest frequency will
|
482
|
+
# make the object needlessly larger, when minute resolution is what we have on the server
|
483
|
+
begin
|
484
|
+
loop do
|
485
|
+
now = Time.now.to_i
|
486
|
+
time_to_wait = if frequency == 0
|
487
|
+
0
|
488
|
+
else
|
489
|
+
next_frequency = (now - (now % frequency)) + frequency
|
490
|
+
time_to_wait = [(next_frequency - Time.now.to_f), 0].max
|
491
|
+
end
|
492
|
+
|
493
|
+
command_and_args, command_options = if @event_aggregator&.size.to_i > MAX_AGGREGATOR_SIZE
|
494
|
+
logger.info "Aggregator full, flushing early with #{MAX_AGGREGATOR_SIZE} metrics."
|
495
|
+
command_and_args, command_options = ['forward', {}]
|
496
|
+
else
|
497
|
+
begin
|
498
|
+
with_timeout(time_to_wait) do
|
499
|
+
@aggregator_queue.pop
|
500
|
+
end
|
501
|
+
rescue Timeout::Error
|
502
|
+
['forward', {}]
|
503
|
+
end
|
504
|
+
end
|
505
|
+
if command_and_args
|
506
|
+
case command_and_args
|
507
|
+
when 'exit'
|
508
|
+
if !@event_aggregator.nil?
|
509
|
+
@sender_queue << @event_aggregator
|
510
|
+
@event_aggregator = nil
|
511
|
+
end
|
512
|
+
logger.info "Exiting, #{@aggregator_queue.size} commands remain"
|
513
|
+
return true
|
514
|
+
when 'flush'
|
515
|
+
if !@event_aggregator.nil?
|
516
|
+
@sender_queue << @event_aggregator
|
517
|
+
@event_aggregator = nil
|
518
|
+
end
|
519
|
+
@sender_queue << ['flush', command_options]
|
520
|
+
when 'forward'
|
521
|
+
if !@event_aggregator.nil?
|
522
|
+
next if @sender_queue.size > 0 && @sender_queue.num_waiting < 1
|
523
|
+
@sender_queue << @event_aggregator
|
524
|
+
@event_aggregator = nil
|
525
|
+
end
|
526
|
+
when Notice
|
527
|
+
@sender_queue << [command_and_args, command_options]
|
528
|
+
else
|
529
|
+
@event_aggregator = EventAggregator.new(frequency: @frequency) if @event_aggregator.nil?
|
530
|
+
|
531
|
+
logger.debug "Sending: #{command_and_args} to aggregator"
|
532
|
+
@event_aggregator.put(command_and_args)
|
533
|
+
end
|
534
|
+
command_and_args = nil
|
535
|
+
command_options = nil
|
536
|
+
end
|
537
|
+
end
|
538
|
+
rescue Exception => err
|
539
|
+
report_exception(err)
|
435
540
|
end
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
"hostname" => HOSTNAME,
|
440
|
-
"pid" => Process.pid,
|
441
|
-
"runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
|
442
|
-
"platform" => RUBY_PLATFORM
|
443
|
-
}.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
|
444
|
-
|
445
|
-
send_with_reply_timeout "hello #{hello_options}"
|
446
|
-
send_with_reply_timeout "authenticate #{@api_key}"
|
541
|
+
end
|
542
|
+
|
543
|
+
def run_sender_loop
|
447
544
|
@failures = 0
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
545
|
+
begin
|
546
|
+
logger.info "connecting to collector"
|
547
|
+
command_and_args = nil
|
548
|
+
command_options = nil
|
549
|
+
with_timeout(CONNECT_TIMEOUT) do
|
550
|
+
@socket = open_socket(@sockaddr_in, @secure, @verify_cert)
|
551
|
+
end
|
552
|
+
logger.info "connected to collector at #{host}:#{port}"
|
553
|
+
hello_options = {
|
554
|
+
"version" => "ruby/instrumental_agent/#{VERSION}",
|
555
|
+
"hostname" => HOSTNAME,
|
556
|
+
"pid" => Process.pid,
|
557
|
+
"runtime" => "#{defined?(RUBY_ENGINE) ? RUBY_ENGINE : "ruby"}/#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}",
|
558
|
+
"platform" => RUBY_PLATFORM
|
559
|
+
}.to_a.flatten.map { |v| v.to_s.gsub(/\s+/, "_") }.join(" ")
|
560
|
+
|
561
|
+
send_with_reply_timeout "hello #{hello_options}"
|
562
|
+
send_with_reply_timeout "authenticate #{@api_key}"
|
563
|
+
|
564
|
+
loop do
|
565
|
+
command_and_args, command_options = @sender_queue.pop
|
566
|
+
if command_and_args
|
567
|
+
sync_resource = command_options && command_options[:sync_resource]
|
568
|
+
test_connection
|
569
|
+
case command_and_args
|
570
|
+
when 'exit'
|
571
|
+
logger.info "Exiting, #{@sender_queue.size} commands remain"
|
572
|
+
return true
|
573
|
+
when 'flush'
|
574
|
+
release_resource = true
|
575
|
+
when EventAggregator
|
576
|
+
command_and_args.values.values.each do |command|
|
577
|
+
logger.debug "Sending: #{command}"
|
578
|
+
@socket.puts command
|
579
|
+
end
|
580
|
+
else
|
581
|
+
logger.debug "Sending: #{command_and_args}"
|
582
|
+
@socket.puts command_and_args
|
583
|
+
end
|
584
|
+
command_and_args = nil
|
585
|
+
command_options = nil
|
586
|
+
if sync_resource
|
587
|
+
@sync_mutex.synchronize do
|
588
|
+
sync_resource.signal
|
589
|
+
end
|
468
590
|
end
|
469
591
|
end
|
470
592
|
end
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
when EOFError
|
593
|
+
rescue Exception => err
|
594
|
+
allow_reconnect = @allow_reconnect
|
595
|
+
case err
|
596
|
+
when EOFError
|
476
597
|
# nop
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
598
|
+
when Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::EADDRINUSE, Timeout::Error, OpenSSL::SSL::SSLError
|
599
|
+
# If the connection has been refused by Instrumental
|
600
|
+
# or we cannot reach the server
|
601
|
+
# or the connection state of this socket is in a race
|
602
|
+
# or SSL is not functioning properly for some reason
|
603
|
+
logger.error "unable to connect to Instrumental, hanging up with #{@sender_queue.size} messages remaining"
|
604
|
+
logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
|
605
|
+
allow_reconnect = false
|
606
|
+
else
|
607
|
+
report_exception(err)
|
608
|
+
end
|
609
|
+
if allow_reconnect == false ||
|
610
|
+
(command_options && command_options[:allow_reconnect] == false)
|
611
|
+
logger.info "Not trying to reconnect"
|
612
|
+
@failures = 0
|
613
|
+
return
|
614
|
+
end
|
615
|
+
if command_and_args
|
616
|
+
logger.debug "requeueing: #{command_and_args}"
|
617
|
+
@sender_queue << command_and_args
|
618
|
+
end
|
619
|
+
disconnect
|
620
|
+
@failures += 1
|
621
|
+
delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
|
622
|
+
logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
|
623
|
+
sleep delay
|
624
|
+
retry
|
625
|
+
ensure
|
626
|
+
disconnect
|
496
627
|
end
|
497
|
-
disconnect
|
498
|
-
@failures += 1
|
499
|
-
delay = [(@failures - 1) ** BACKOFF, MAX_RECONNECT_DELAY].min
|
500
|
-
logger.error "disconnected, #{@failures} failures in a row, reconnect in #{delay}..."
|
501
|
-
sleep delay
|
502
|
-
retry
|
503
|
-
ensure
|
504
|
-
disconnect
|
505
628
|
end
|
506
629
|
|
507
630
|
def setup_cleanup_at_exit
|
@@ -511,7 +634,11 @@ module Instrumental
|
|
511
634
|
end
|
512
635
|
|
513
636
|
def running?
|
514
|
-
!@
|
637
|
+
!@sender_thread.nil? &&
|
638
|
+
!@aggregator_thread.nil? &&
|
639
|
+
@pid == Process.pid &&
|
640
|
+
@sender_thread.alive? &&
|
641
|
+
@aggregator_thread.alive?
|
515
642
|
end
|
516
643
|
|
517
644
|
def flush_socket(socket)
|
@@ -541,18 +668,5 @@ module Instrumental
|
|
541
668
|
def allows_secure?
|
542
669
|
defined?(OpenSSL)
|
543
670
|
end
|
544
|
-
|
545
|
-
def certificates
|
546
|
-
if allows_secure?
|
547
|
-
base_dir = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
548
|
-
%w{equifax geotrust rapidssl}.map do |name|
|
549
|
-
OpenSSL::X509::Certificate.new(File.open(File.join(base_dir, "certs", "#{name}.ca.pem")))
|
550
|
-
end
|
551
|
-
else
|
552
|
-
[]
|
553
|
-
end
|
554
|
-
end
|
555
|
-
|
556
671
|
end
|
557
|
-
|
558
672
|
end
|