instrumental_agent 3.0.0.alpha → 3.0.0.beta
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/CHANGELOG.md +6 -0
- data/README.md +22 -0
- data/instrumental_agent.gemspec +1 -1
- data/lib/instrumental/agent.rb +191 -65
- data/lib/instrumental/command_structs.rb +32 -0
- data/lib/instrumental/event_aggregator.rb +26 -0
- data/lib/instrumental/version.rb +1 -1
- data/spec/agent_spec.rb +332 -35
- data/spec/command_struct_specs.rb +20 -0
- data/spec/event_aggregator_spec.rb +53 -0
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1a781a0abec98e8c737a57fac5134fe2ee847879949fc5134ced2c0b0136076
|
4
|
+
data.tar.gz: 89950c6e0b59713dc6034be99acfc52473ef049b63824685694626d049a85b8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca82be6040189400c0e01513d517202e8d27d900ad2f10e5558025cad6a499e49985ee58c95efdf90d24e2fd0174b9349c7cd6ac8f906325f2de766c0f14cba3
|
7
|
+
data.tar.gz: 742200da53f676a9341a795f5f8145416650c033466782348fc4852dc62e656e9fed4e2be02c0829c2ec946092e7dfcd7d836cc50e731a6ceb8cb2493b76a1b6
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
### 3.0.0.beta [October 8, 2020]
|
2
|
+
* Drop support for outdated versions of Ruby
|
3
|
+
* Explicitly add support for new versions of Ruby
|
4
|
+
* Add support for client-side aggregation
|
5
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
6
|
+
|
1
7
|
### 3.0.0.alpha [August 22, 2019]
|
2
8
|
* Drop support for outdated versions of Ruby
|
3
9
|
* Explicitly add support for new versions of Ruby
|
data/README.md
CHANGED
@@ -59,6 +59,17 @@ User.find_each do |user|
|
|
59
59
|
end
|
60
60
|
```
|
61
61
|
|
62
|
+
## Aggregation
|
63
|
+
Aggregation collects more data on your system before sending it to Instrumental. This reduces the total amount of data being sent, at the cost of a small amount of additional latency. You can control this feature with the frequency parameter:
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN', :frequency => 15) # send data every 15 seconds
|
67
|
+
I.frequency = 6 # send batches of data every 6 seconds
|
68
|
+
```
|
69
|
+
|
70
|
+
The agent may send data more frequently if you are sending a large number of different metrics. Values between 3 and 15 are generally reasonable. If you want to disable this behavior and send every metric as fast as possible, set frequency to zero or nil. Note that a frequency of zero will still use a seperate thread for performance - it is NOT the same as synchronous mode.
|
71
|
+
|
72
|
+
|
62
73
|
## Server Metrics
|
63
74
|
|
64
75
|
Want server stats like load, memory, etc.? Check out [InstrumentalD](https://github.com/instrumental/instrumentald).
|
@@ -109,6 +120,17 @@ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
|
109
120
|
)
|
110
121
|
```
|
111
122
|
|
123
|
+
### Upgrading from 2.x
|
124
|
+
|
125
|
+
Agent version 3.x drops support for some older rubies, but should otherwise be a drop-in replacement. If you wish to enable Aggregation, enable the agent with the frequency option set to the number of seconds you would like to wait between flushes. For example:
|
126
|
+
|
127
|
+
```
|
128
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
129
|
+
:enabled => Rails.env.production?,
|
130
|
+
:frequency => 15
|
131
|
+
)
|
132
|
+
```
|
133
|
+
|
112
134
|
## Troubleshooting & Help
|
113
135
|
|
114
136
|
We are here to help. Email us at [support@instrumentalapp.com](mailto:support@instrumentalapp.com).
|
data/instrumental_agent.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.summary = %q{Custom metric monitoring for Ruby applications via Instrumental}
|
11
11
|
s.description = %q{This agent supports Instrumental custom metric monitoring for Ruby applications. It provides high-data reliability at high scale, without ever blocking your process or causing an exception.}
|
12
12
|
s.license = "MIT"
|
13
|
-
s.required_ruby_version = '>= 2.
|
13
|
+
s.required_ruby_version = '>= 2.5.7'
|
14
14
|
|
15
15
|
s.files = `git ls-files`.split("\n")
|
16
16
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/instrumental/agent.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'instrumental/version'
|
2
2
|
require 'instrumental/system_timer'
|
3
|
+
require 'instrumental/command_structs'
|
4
|
+
require 'instrumental/event_aggregator'
|
3
5
|
require 'logger'
|
4
6
|
require 'openssl' rescue nil
|
5
7
|
require 'resolv'
|
@@ -15,14 +17,17 @@ module Instrumental
|
|
15
17
|
EXIT_FLUSH_TIMEOUT = 5
|
16
18
|
HOSTNAME = Socket.gethostbyname(Socket.gethostname).first rescue Socket.gethostname
|
17
19
|
MAX_BUFFER = 5000
|
20
|
+
MAX_AGGREGATOR_SIZE = 5000
|
18
21
|
MAX_RECONNECT_DELAY = 15
|
19
22
|
REPLY_TIMEOUT = 10
|
20
23
|
RESOLUTION_FAILURES_BEFORE_WAITING = 3
|
21
24
|
RESOLUTION_WAIT = 30
|
22
25
|
RESOLVE_TIMEOUT = 1
|
26
|
+
DEFAULT_FREQUENCY = 0
|
27
|
+
VALID_FREQUENCIES = [0, 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60]
|
23
28
|
|
24
29
|
|
25
|
-
attr_accessor :host, :port, :synchronous, :
|
30
|
+
attr_accessor :host, :port, :synchronous, :frequency, :sender_queue, :aggregator_queue, :dns_resolutions, :last_connect_at
|
26
31
|
attr_reader :connection, :enabled, :secure
|
27
32
|
|
28
33
|
def self.logger=(l)
|
@@ -52,6 +57,7 @@ module Instrumental
|
|
52
57
|
# port: 8001
|
53
58
|
# enabled: true
|
54
59
|
# synchronous: false
|
60
|
+
# frequency: 10
|
55
61
|
# secure: true
|
56
62
|
# verify: true
|
57
63
|
@api_key = api_key
|
@@ -73,13 +79,23 @@ module Instrumental
|
|
73
79
|
@port = (@port || default_port).to_i
|
74
80
|
@enabled = options.has_key?(:enabled) ? !!options[:enabled] : true
|
75
81
|
@synchronous = !!options[:synchronous]
|
82
|
+
|
83
|
+
if options.has_key?(:frequency)
|
84
|
+
self.frequency = options[:frequency]
|
85
|
+
else
|
86
|
+
self.frequency = DEFAULT_FREQUENCY
|
87
|
+
end
|
88
|
+
|
89
|
+
@metrician = options[:metrician].nil? ? true : !!options[:metrician]
|
76
90
|
@pid = Process.pid
|
77
91
|
@allow_reconnect = true
|
78
92
|
@dns_resolutions = 0
|
79
93
|
@last_connect_at = 0
|
80
|
-
|
94
|
+
|
81
95
|
@start_worker_mutex = Mutex.new
|
82
|
-
@
|
96
|
+
@aggregator_queue = Queue.new
|
97
|
+
@sender_queue = Queue.new
|
98
|
+
|
83
99
|
|
84
100
|
setup_cleanup_at_exit if @enabled
|
85
101
|
|
@@ -93,7 +109,9 @@ module Instrumental
|
|
93
109
|
# agent.gauge('load', 1.23)
|
94
110
|
def gauge(metric, value, time = Time.now, count = 1)
|
95
111
|
if valid?(metric, value, time, count) &&
|
96
|
-
|
112
|
+
send_command(Instrumental::Command.new("gauge".freeze, metric, value, time, count))
|
113
|
+
# tempted to "gauge" this to a symbol? Don't. Frozen strings are very fast,
|
114
|
+
# and later we're going to to_s every one of these anyway.
|
97
115
|
value
|
98
116
|
else
|
99
117
|
nil
|
@@ -141,7 +159,7 @@ module Instrumental
|
|
141
159
|
# agent.increment('users')
|
142
160
|
def increment(metric, value = 1, time = Time.now, count = 1)
|
143
161
|
if valid?(metric, value, time, count) &&
|
144
|
-
|
162
|
+
send_command(Instrumental::Command.new("increment".freeze, metric, value, time, count))
|
145
163
|
value
|
146
164
|
else
|
147
165
|
nil
|
@@ -156,7 +174,7 @@ module Instrumental
|
|
156
174
|
# agent.notice('A notice')
|
157
175
|
def notice(note, time = Time.now, duration = 0)
|
158
176
|
if valid_note?(note)
|
159
|
-
send_command(
|
177
|
+
send_command(Instrumental::Notice.new(note, time, duration))
|
160
178
|
note
|
161
179
|
else
|
162
180
|
nil
|
@@ -195,6 +213,22 @@ module Instrumental
|
|
195
213
|
@logger || self.class.logger
|
196
214
|
end
|
197
215
|
|
216
|
+
def frequency=(frequency)
|
217
|
+
freq = frequency.to_i
|
218
|
+
if !VALID_FREQUENCIES.include?(freq)
|
219
|
+
logger.warn "Frequency must be a value that divides evenly into 60: 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, or 60."
|
220
|
+
# this will make all negative numbers and nils into 0s
|
221
|
+
freq = VALID_FREQUENCIES.select{ |f| f < freq }.max.to_i
|
222
|
+
end
|
223
|
+
|
224
|
+
@frequency = if(@synchronous)
|
225
|
+
logger.warn "Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode."
|
226
|
+
0
|
227
|
+
else
|
228
|
+
freq
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
198
232
|
# Stopping the agent will immediately stop all communication
|
199
233
|
# to Instrumental. If you call this and submit another metric,
|
200
234
|
# the agent will start again.
|
@@ -206,12 +240,19 @@ module Instrumental
|
|
206
240
|
#
|
207
241
|
def stop
|
208
242
|
disconnect
|
209
|
-
if @
|
210
|
-
@
|
211
|
-
@
|
243
|
+
if @sender_thread
|
244
|
+
@sender_thread.kill
|
245
|
+
@sender_thread = nil
|
246
|
+
end
|
247
|
+
if @aggregator_thread
|
248
|
+
@aggregator_thread.kill
|
249
|
+
@aggregator_thread = nil
|
212
250
|
end
|
213
|
-
if @
|
214
|
-
@
|
251
|
+
if @sender_queue
|
252
|
+
@sender_queue.clear
|
253
|
+
end
|
254
|
+
if @aggregator_queue
|
255
|
+
@aggregator_queue.clear
|
215
256
|
end
|
216
257
|
end
|
217
258
|
|
@@ -221,15 +262,22 @@ module Instrumental
|
|
221
262
|
# where at_exit is bypassed like Resque workers.
|
222
263
|
def cleanup
|
223
264
|
if running?
|
224
|
-
logger.info "Cleaning up agent,
|
265
|
+
logger.info "Cleaning up agent, aggregator_size: #{@aggregator_queue.size}, thread_running: #{@aggregator_thread.alive?}"
|
266
|
+
logger.info "Cleaning up agent, queue size: #{@sender_queue.size}, thread running: #{@sender_thread.alive?}"
|
225
267
|
@allow_reconnect = false
|
226
|
-
if @
|
227
|
-
|
268
|
+
if @sender_queue.size > 0 || @aggregator_queue.size > 0
|
269
|
+
@sender_queue << ['exit']
|
270
|
+
@aggregator_queue << ['exit']
|
228
271
|
begin
|
229
|
-
with_timeout(EXIT_FLUSH_TIMEOUT) { @
|
272
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) { @aggregator_thread.join }
|
273
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) { @sender_thread.join }
|
230
274
|
rescue Timeout::Error
|
231
|
-
|
232
|
-
|
275
|
+
total_size = @sender_queue&.size.to_i +
|
276
|
+
@aggregator_queue&.size.to_i +
|
277
|
+
@event_aggregator&.size.to_i
|
278
|
+
|
279
|
+
if total_size > 0
|
280
|
+
logger.error "Timed out working agent thread on exit, dropping #{total_size} metrics"
|
233
281
|
else
|
234
282
|
logger.error "Timed out Instrumental Agent, exiting"
|
235
283
|
end
|
@@ -270,6 +318,7 @@ module Instrumental
|
|
270
318
|
end
|
271
319
|
|
272
320
|
def report_exception(e)
|
321
|
+
# puts "--- Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
273
322
|
logger.error "Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
274
323
|
end
|
275
324
|
|
@@ -290,44 +339,41 @@ module Instrumental
|
|
290
339
|
nil
|
291
340
|
end
|
292
341
|
|
293
|
-
def send_command(
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
queue_message(cmd, { :synchronous => @synchronous })
|
302
|
-
else
|
303
|
-
if !@queue_full_warning
|
304
|
-
@queue_full_warning = true
|
305
|
-
logger.warn "Queue full(#{@queue.size}), dropping commands..."
|
306
|
-
end
|
307
|
-
logger.debug "Dropping command, queue full(#{@queue.size}): #{cmd.chomp}"
|
308
|
-
nil
|
309
|
-
end
|
342
|
+
def send_command(command)
|
343
|
+
return logger.debug(command.to_s) unless enabled?
|
344
|
+
start_workers
|
345
|
+
critical_queue = frequency.to_i == 0 ? @sender_queue : @aggregator_queue
|
346
|
+
if critical_queue && critical_queue.size < MAX_BUFFER
|
347
|
+
@queue_full_warning = false
|
348
|
+
logger.debug "Queueing: #{command.to_s}"
|
349
|
+
queue_message(command, { :synchronous => @synchronous })
|
310
350
|
else
|
311
|
-
|
351
|
+
if !@queue_full_warning
|
352
|
+
@queue_full_warning = true
|
353
|
+
logger.warn "Queue full(#{critical_queue.size}), dropping commands..."
|
354
|
+
end
|
355
|
+
logger.debug "Dropping command, queue full(#{critical_queue.size}): #{command.to_s}"
|
356
|
+
nil
|
312
357
|
end
|
313
358
|
end
|
314
359
|
|
315
360
|
def queue_message(message, options = {})
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
@
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
361
|
+
return message unless enabled?
|
362
|
+
|
363
|
+
# imagine it's a reverse merge, but with fewer allocations
|
364
|
+
options[:allow_reconnect] = @allow_reconnect unless options.has_key?(:allow_reconnect)
|
365
|
+
|
366
|
+
if options.delete(:synchronous)
|
367
|
+
options[:sync_resource] ||= ConditionVariable.new
|
368
|
+
@sync_mutex.synchronize {
|
369
|
+
queue = message == "flush" ? @aggregator_queue : @sender_queue
|
370
|
+
queue << [message, options]
|
371
|
+
options[:sync_resource].wait(@sync_mutex)
|
372
|
+
}
|
373
|
+
elsif frequency.to_i == 0
|
374
|
+
@sender_queue << [message, options]
|
375
|
+
else
|
376
|
+
@aggregator_queue << [message, options]
|
331
377
|
end
|
332
378
|
message
|
333
379
|
end
|
@@ -355,9 +401,9 @@ module Instrumental
|
|
355
401
|
end
|
356
402
|
end
|
357
403
|
|
358
|
-
def
|
404
|
+
def start_workers
|
359
405
|
# NOTE: We need a mutex around both `running?` and thread creation,
|
360
|
-
# otherwise we could create
|
406
|
+
# otherwise we could create too many threads.
|
361
407
|
# Return early and queue the message if another thread is
|
362
408
|
# starting the worker.
|
363
409
|
return if !@start_worker_mutex.try_lock
|
@@ -371,9 +417,19 @@ module Instrumental
|
|
371
417
|
@sync_mutex = Mutex.new
|
372
418
|
@failures = 0
|
373
419
|
@sockaddr_in = Socket.pack_sockaddr_in(@port, address)
|
374
|
-
|
375
|
-
|
376
|
-
|
420
|
+
|
421
|
+
logger.info "Starting aggregator thread"
|
422
|
+
if !@aggregator_thread&.alive?
|
423
|
+
@aggregator_thread = Thread.new do
|
424
|
+
run_aggregator_loop
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
if !@sender_thread&.alive?
|
429
|
+
logger.info "Starting sender thread"
|
430
|
+
@sender_thread = Thread.new do
|
431
|
+
run_sender_loop
|
432
|
+
end
|
377
433
|
end
|
378
434
|
end
|
379
435
|
ensure
|
@@ -409,12 +465,73 @@ module Instrumental
|
|
409
465
|
sock
|
410
466
|
end
|
411
467
|
|
412
|
-
def
|
468
|
+
def run_aggregator_loop
|
469
|
+
# if the sender queue is some level of full, should we keep aggregating until it empties out?
|
470
|
+
# what does this mean for aggregation slices - aggregating to nearest frequency will
|
471
|
+
# make the object needlessly larger, when minute resolution is what we have on the server
|
472
|
+
begin
|
473
|
+
loop do
|
474
|
+
now = Time.now.to_i
|
475
|
+
time_to_wait = if frequency == 0
|
476
|
+
0
|
477
|
+
else
|
478
|
+
next_frequency = (now - (now % frequency)) + frequency
|
479
|
+
time_to_wait = [(next_frequency - Time.now.to_f), 0].max
|
480
|
+
end
|
481
|
+
|
482
|
+
command_and_args, command_options = if @event_aggregator&.size.to_i > MAX_AGGREGATOR_SIZE
|
483
|
+
logger.info "Aggregator full, flushing early with #{MAX_AGGREGATOR_SIZE} metrics."
|
484
|
+
command_and_args, command_options = ['forward', {}]
|
485
|
+
else
|
486
|
+
begin
|
487
|
+
with_timeout(time_to_wait) do
|
488
|
+
@aggregator_queue.pop
|
489
|
+
end
|
490
|
+
rescue Timeout::Error
|
491
|
+
['forward', {}]
|
492
|
+
end
|
493
|
+
end
|
494
|
+
if command_and_args
|
495
|
+
sync_resource = command_options && command_options[:sync_resource]
|
496
|
+
case command_and_args
|
497
|
+
when 'exit'
|
498
|
+
logger.info "Exiting, #{@aggregator_queue.size} commands remain"
|
499
|
+
return true
|
500
|
+
when 'flush'
|
501
|
+
if !@event_aggregator.nil?
|
502
|
+
@sender_queue << @event_aggregator
|
503
|
+
@event_aggregator = nil
|
504
|
+
end
|
505
|
+
@sender_queue << ['flush', command_options]
|
506
|
+
when 'forward'
|
507
|
+
if !@event_aggregator.nil?
|
508
|
+
next if @sender_queue.size > 0 && @sender_queue.num_waiting < 1
|
509
|
+
@sender_queue << @event_aggregator
|
510
|
+
@event_aggregator = nil
|
511
|
+
end
|
512
|
+
when Notice
|
513
|
+
@sender_queue << [command_and_args, command_options]
|
514
|
+
else
|
515
|
+
@event_aggregator = EventAggregator.new(frequency: @frequency) if @event_aggregator.nil?
|
516
|
+
|
517
|
+
logger.debug "Sending: #{command_and_args} to aggregator"
|
518
|
+
@event_aggregator.put(command_and_args)
|
519
|
+
end
|
520
|
+
command_and_args = nil
|
521
|
+
command_options = nil
|
522
|
+
end
|
523
|
+
end
|
524
|
+
rescue Exception => err
|
525
|
+
report_exception(err)
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def run_sender_loop
|
413
530
|
@failures = 0
|
414
531
|
begin
|
415
|
-
|
416
|
-
|
417
|
-
|
532
|
+
logger.info "connecting to collector"
|
533
|
+
command_and_args = nil
|
534
|
+
command_options = nil
|
418
535
|
with_timeout(CONNECT_TIMEOUT) do
|
419
536
|
@socket = open_socket(@sockaddr_in, @secure, @verify_cert)
|
420
537
|
end
|
@@ -431,18 +548,23 @@ module Instrumental
|
|
431
548
|
send_with_reply_timeout "authenticate #{@api_key}"
|
432
549
|
|
433
550
|
loop do
|
434
|
-
command_and_args, command_options = @
|
551
|
+
command_and_args, command_options = @sender_queue.pop
|
435
552
|
if command_and_args
|
436
553
|
sync_resource = command_options && command_options[:sync_resource]
|
437
554
|
test_connection
|
438
555
|
case command_and_args
|
439
556
|
when 'exit'
|
440
|
-
logger.info "Exiting, #{@
|
557
|
+
logger.info "Exiting, #{@sender_queue.size} commands remain"
|
441
558
|
return true
|
442
559
|
when 'flush'
|
443
560
|
release_resource = true
|
561
|
+
when EventAggregator
|
562
|
+
command_and_args.values.values.each do |command|
|
563
|
+
logger.debug "Sending: #{command}"
|
564
|
+
@socket.puts command
|
565
|
+
end
|
444
566
|
else
|
445
|
-
logger.debug "Sending: #{command_and_args
|
567
|
+
logger.debug "Sending: #{command_and_args}"
|
446
568
|
@socket.puts command_and_args
|
447
569
|
end
|
448
570
|
command_and_args = nil
|
@@ -464,7 +586,7 @@ module Instrumental
|
|
464
586
|
# or we cannot reach the server
|
465
587
|
# or the connection state of this socket is in a race
|
466
588
|
# or SSL is not functioning properly for some reason
|
467
|
-
logger.error "unable to connect to Instrumental, hanging up with #{@
|
589
|
+
logger.error "unable to connect to Instrumental, hanging up with #{@sender_queue.size} messages remaining"
|
468
590
|
logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
|
469
591
|
allow_reconnect = false
|
470
592
|
else
|
@@ -478,7 +600,7 @@ module Instrumental
|
|
478
600
|
end
|
479
601
|
if command_and_args
|
480
602
|
logger.debug "requeueing: #{command_and_args}"
|
481
|
-
@
|
603
|
+
@sender_queue << command_and_args
|
482
604
|
end
|
483
605
|
disconnect
|
484
606
|
@failures += 1
|
@@ -498,7 +620,11 @@ module Instrumental
|
|
498
620
|
end
|
499
621
|
|
500
622
|
def running?
|
501
|
-
!@
|
623
|
+
!@sender_thread.nil? &&
|
624
|
+
!@aggregator_thread.nil? &&
|
625
|
+
@pid == Process.pid &&
|
626
|
+
@sender_thread.alive? &&
|
627
|
+
@aggregator_thread.alive?
|
502
628
|
end
|
503
629
|
|
504
630
|
def flush_socket(socket)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Instrumental
|
2
|
+
METRIC_TYPES = ["increment".freeze, "gauge".freeze].freeze
|
3
|
+
|
4
|
+
Command = Struct.new(:command, :metric, :value, :time, :count) do
|
5
|
+
def initialize(command, metric, value, time, count)
|
6
|
+
super(command, metric, value, time.to_i, count.to_i)
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
[command, metric, value, time, count].map(&:to_s).join(" ")
|
11
|
+
end
|
12
|
+
|
13
|
+
def metadata
|
14
|
+
"#{metric}:#{time}".freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def +(other_command)
|
18
|
+
return self if other_command.nil?
|
19
|
+
Command.new(command, metric, value + other_command.value, time, count + other_command.count)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Notice = Struct.new(:note, :time, :duration) do
|
24
|
+
def initialize(note, time, duration)
|
25
|
+
super(note, time.to_i, duration.to_i)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
["notice".freeze, time, duration, note].map(&:to_s).join(" ")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class EventAggregator
|
2
|
+
attr_accessor :counts, :values, :received_at, :frequency
|
3
|
+
|
4
|
+
def initialize(frequency:)
|
5
|
+
@values = Hash.new
|
6
|
+
@frequency = frequency
|
7
|
+
end
|
8
|
+
|
9
|
+
def put(command)
|
10
|
+
command_at = command.time
|
11
|
+
unless(command_at % frequency == 0)
|
12
|
+
command.time = (command_at - (command_at % frequency))
|
13
|
+
end
|
14
|
+
metadata = command.metadata
|
15
|
+
@values[metadata] = (command + @values[metadata])
|
16
|
+
end
|
17
|
+
|
18
|
+
def size
|
19
|
+
@values.size
|
20
|
+
end
|
21
|
+
|
22
|
+
def coerce_time(time)
|
23
|
+
itime = time.to_i
|
24
|
+
(itime - (itime % frequency)).to_i
|
25
|
+
end
|
26
|
+
end
|
data/lib/instrumental/version.rb
CHANGED
data/spec/agent_spec.rb
CHANGED
@@ -39,7 +39,8 @@ shared_examples "Instrumental Agent" do
|
|
39
39
|
let(:token) { 'test_token' }
|
40
40
|
let(:address) { server.host_and_port }
|
41
41
|
let(:metrician) { false }
|
42
|
-
let(:
|
42
|
+
let(:frequency) { 0 }
|
43
|
+
let(:agent) { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician, :frequency => frequency) }
|
43
44
|
|
44
45
|
# Server options
|
45
46
|
let(:listen) { true }
|
@@ -47,6 +48,12 @@ shared_examples "Instrumental Agent" do
|
|
47
48
|
let(:authenticate) { true }
|
48
49
|
let(:server) { TestServer.new(:listen => listen, :authenticate => authenticate, :response => response, :secure => secure?) }
|
49
50
|
|
51
|
+
# Time Travel Options
|
52
|
+
let(:start_of_minute) do
|
53
|
+
now = Time.now.to_i
|
54
|
+
Time.at(now - (now % 60))
|
55
|
+
end
|
56
|
+
|
50
57
|
before do
|
51
58
|
Instrumental::Agent.logger.level = Logger::UNKNOWN
|
52
59
|
@server = server
|
@@ -233,11 +240,11 @@ shared_examples "Instrumental Agent" do
|
|
233
240
|
end
|
234
241
|
|
235
242
|
wait
|
236
|
-
expect(agent.
|
237
|
-
expect(agent.
|
238
|
-
expect(agent.
|
239
|
-
expect(agent.
|
240
|
-
expect(agent.
|
243
|
+
expect(agent.sender_queue.size).to eq(3)
|
244
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 1 300 1")
|
245
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 2 300 1")
|
246
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 3 300 1")
|
247
|
+
expect(agent.sender_queue.size).to eq(0)
|
241
248
|
end
|
242
249
|
end
|
243
250
|
end
|
@@ -248,7 +255,7 @@ shared_examples "Instrumental Agent" do
|
|
248
255
|
5.times do |i|
|
249
256
|
agent.increment('overflow_test', i + 1, 300)
|
250
257
|
end
|
251
|
-
expect(agent.instance_variable_get(:@
|
258
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
252
259
|
wait # let the server receive the commands
|
253
260
|
expect(server.commands).to include("increment overflow_test 1 300 1")
|
254
261
|
expect(server.commands).to include("increment overflow_test 2 300 1")
|
@@ -264,8 +271,10 @@ shared_examples "Instrumental Agent" do
|
|
264
271
|
fork do
|
265
272
|
agent.increment('fork_reconnect_test', 1, 3) # triggers reconnect
|
266
273
|
end
|
274
|
+
|
267
275
|
wait(1)
|
268
276
|
agent.increment('fork_reconnect_test', 1, 4) # triggers reconnect
|
277
|
+
|
269
278
|
wait(1)
|
270
279
|
expect(server.connect_count).to eq(2)
|
271
280
|
|
@@ -281,17 +290,17 @@ shared_examples "Instrumental Agent" do
|
|
281
290
|
sleep 1
|
282
291
|
}
|
283
292
|
|
284
|
-
|
285
|
-
allow(agent).to receive(:
|
286
|
-
|
293
|
+
run_sender_loop_calls = 0
|
294
|
+
allow(agent).to receive(:run_sender_loop) {
|
295
|
+
run_sender_loop_calls += 1
|
287
296
|
sleep 3 # keep the worker thread alive
|
288
297
|
}
|
289
298
|
|
290
299
|
t = Thread.new { agent.increment("race") }
|
291
300
|
agent.increment("race")
|
292
301
|
wait(2)
|
293
|
-
expect(
|
294
|
-
expect(agent.
|
302
|
+
expect(run_sender_loop_calls).to eq(1)
|
303
|
+
expect(agent.sender_queue.size).to eq(2)
|
295
304
|
end
|
296
305
|
|
297
306
|
it "should never let an exception reach the user" do
|
@@ -314,14 +323,6 @@ shared_examples "Instrumental Agent" do
|
|
314
323
|
expect(agent.increment("test")).to eq(nil)
|
315
324
|
end
|
316
325
|
|
317
|
-
it "should track invalid metrics" do
|
318
|
-
expect(agent.logger).to receive(:warn).with(/%%/)
|
319
|
-
agent.increment(' %% .!#@$%^&*', 1, 1)
|
320
|
-
wait do
|
321
|
-
expect(server.commands.join("\n")).to include("increment agent.invalid_metric")
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
326
|
it "should allow reasonable metric names" do
|
326
327
|
agent.increment('a')
|
327
328
|
agent.increment('a.b')
|
@@ -399,9 +400,9 @@ shared_examples "Instrumental Agent" do
|
|
399
400
|
|
400
401
|
it "should allow flushing pending values to the server" do
|
401
402
|
1.upto(100) { agent.gauge('a', rand(50)) }
|
402
|
-
expect(agent.instance_variable_get(:@
|
403
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to be > 0
|
403
404
|
agent.flush
|
404
|
-
expect(agent.instance_variable_get(:@
|
405
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
405
406
|
wait do
|
406
407
|
expect(server.commands.grep(/^gauge a /).size).to eq(100)
|
407
408
|
end
|
@@ -439,7 +440,7 @@ shared_examples "Instrumental Agent" do
|
|
439
440
|
agent.increment('reconnect_test', 1, 1234)
|
440
441
|
wait
|
441
442
|
# The agent should not have sent the metric yet, the server is not responding
|
442
|
-
expect(agent.
|
443
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
443
444
|
end
|
444
445
|
|
445
446
|
it "should warn once when buffer is full" do
|
@@ -474,7 +475,7 @@ shared_examples "Instrumental Agent" do
|
|
474
475
|
agent.increment('reconnect_test', 1, 1234)
|
475
476
|
wait
|
476
477
|
# Since server hasn't responded to hello or authenticate, worker thread will not send data
|
477
|
-
expect(agent.
|
478
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
478
479
|
end
|
479
480
|
end
|
480
481
|
|
@@ -495,7 +496,7 @@ shared_examples "Instrumental Agent" do
|
|
495
496
|
wait do
|
496
497
|
expect(agent.send(:running?)).to eq(false)
|
497
498
|
end
|
498
|
-
expect(agent.
|
499
|
+
expect(agent.sender_queue.size).to eq(1)
|
499
500
|
end
|
500
501
|
|
501
502
|
it "should restart the worker thread after hanging it up during an unreachable host event" do
|
@@ -514,7 +515,7 @@ shared_examples "Instrumental Agent" do
|
|
514
515
|
wait do
|
515
516
|
expect(agent.send(:running?)).to eq(false)
|
516
517
|
end
|
517
|
-
expect(agent.
|
518
|
+
expect(agent.sender_queue.size).to eq(1)
|
518
519
|
# Start the server back up again
|
519
520
|
server.listen
|
520
521
|
# Sending another metric should kickstart the background worker thread
|
@@ -522,7 +523,7 @@ shared_examples "Instrumental Agent" do
|
|
522
523
|
# The agent should now be running the background thread, and the queue should be empty
|
523
524
|
wait do
|
524
525
|
expect(agent.send(:running?)).to eq(true)
|
525
|
-
expect(agent.
|
526
|
+
expect(agent.sender_queue.size).to eq(0)
|
526
527
|
end
|
527
528
|
end
|
528
529
|
|
@@ -547,7 +548,7 @@ shared_examples "Instrumental Agent" do
|
|
547
548
|
expect(agent.send(:running?)).to eq(false)
|
548
549
|
end
|
549
550
|
# The command is not in the queue
|
550
|
-
expect(agent.
|
551
|
+
expect(agent.sender_queue.size).to eq(0)
|
551
552
|
# allow the agent to behave normally
|
552
553
|
test_connection_fail = false
|
553
554
|
# Sending another metric should kickstart the background worker thread
|
@@ -555,7 +556,7 @@ shared_examples "Instrumental Agent" do
|
|
555
556
|
# The agent should now be running the background thread, and the queue should be empty
|
556
557
|
wait do
|
557
558
|
expect(agent.send(:running?)).to eq(true)
|
558
|
-
expect(agent.
|
559
|
+
expect(agent.sender_queue.size).to eq(0)
|
559
560
|
expect(server.commands.grep(/connection_failure/).size).to eq(2)
|
560
561
|
end
|
561
562
|
end
|
@@ -580,7 +581,7 @@ shared_examples "Instrumental Agent" do
|
|
580
581
|
agent.gauge('connection_failure_3', 1, 1234)
|
581
582
|
wait do
|
582
583
|
expect(agent.instance_variable_get(:@failures)).to be > 0
|
583
|
-
expect(agent.
|
584
|
+
expect(agent.sender_queue.size).to be > 0
|
584
585
|
end
|
585
586
|
|
586
587
|
# let the loop proceed
|
@@ -588,12 +589,11 @@ shared_examples "Instrumental Agent" do
|
|
588
589
|
|
589
590
|
wait do
|
590
591
|
expect(agent.send(:running?)).to eq(true)
|
591
|
-
expect(agent.
|
592
|
+
expect(agent.sender_queue.size).to eq(0)
|
592
593
|
end
|
593
594
|
end
|
594
595
|
end
|
595
596
|
|
596
|
-
|
597
597
|
context 'not authenticating' do
|
598
598
|
# Server will fail all authentication attempts
|
599
599
|
let(:authenticate) { false }
|
@@ -602,7 +602,7 @@ shared_examples "Instrumental Agent" do
|
|
602
602
|
agent.increment('reconnect_test', 1, 1234)
|
603
603
|
wait
|
604
604
|
# Metrics should not have been sent since all authentication failed
|
605
|
-
expect(agent.
|
605
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
606
606
|
end
|
607
607
|
end
|
608
608
|
|
@@ -639,7 +639,7 @@ shared_examples "Instrumental Agent" do
|
|
639
639
|
it "should not wait to exit a process if there are no commands queued" do
|
640
640
|
allow(agent).to receive(:open_socket) { |*args, &block| sleep(5) && block.call }
|
641
641
|
with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
|
642
|
-
if (pid = fork { agent.increment('foo', 1); agent.
|
642
|
+
if (pid = fork { agent.increment('foo', 1); agent.sender_queue.clear })
|
643
643
|
tm = Time.now.to_f
|
644
644
|
Process.wait(pid)
|
645
645
|
diff = Time.now.to_f - tm
|
@@ -725,7 +725,7 @@ shared_examples "Instrumental Agent" do
|
|
725
725
|
expect(agent.send(:running?)).to eq(true)
|
726
726
|
|
727
727
|
# Setup a failure for the next command so we'll break out of the inner
|
728
|
-
# loop in
|
728
|
+
# loop in run_sender_loop causing another call to open_socket
|
729
729
|
test_connection_fail = true
|
730
730
|
tc = agent.method(:test_connection)
|
731
731
|
allow(agent).to receive(:test_connection) { |*args, &block| test_connection_fail ? raise("fail") : tc.call(*args) }
|
@@ -802,6 +802,303 @@ shared_examples "Instrumental Agent" do
|
|
802
802
|
end
|
803
803
|
end
|
804
804
|
end
|
805
|
+
|
806
|
+
describe Instrumental::Agent, "aggregation" do
|
807
|
+
context "aggregation enabled" do
|
808
|
+
let(:frequency) { 2 }
|
809
|
+
|
810
|
+
it "can be enabled at Agent.new time" do
|
811
|
+
expect(agent.frequency).to eq(2)
|
812
|
+
end
|
813
|
+
|
814
|
+
it "can be modified by setting the agent frequency" do
|
815
|
+
agent.frequency = 15
|
816
|
+
expect(agent.frequency).to eq(15)
|
817
|
+
end
|
818
|
+
|
819
|
+
it "is disabled by default" do
|
820
|
+
agent = Instrumental::Agent.new('test_token')
|
821
|
+
expect(agent.frequency.to_f).to eq(0)
|
822
|
+
end
|
823
|
+
|
824
|
+
it "should only allow frequencies that align with minutes" do
|
825
|
+
(-5..100).each do |freq|
|
826
|
+
agent.frequency = freq
|
827
|
+
expect(Instrumental::Agent::VALID_FREQUENCIES).to include(agent.frequency)
|
828
|
+
end
|
829
|
+
end
|
830
|
+
|
831
|
+
it "bypasses aggregator queue entirely for most commands when frequency == 0" do
|
832
|
+
agent.frequency = 0 # this is red - 0 for green
|
833
|
+
expect(EventAggregator).not_to receive(:new)
|
834
|
+
agent.increment('a_metric')
|
835
|
+
end
|
836
|
+
|
837
|
+
it "adds data to the event aggregator and does not immediately send it" do
|
838
|
+
Timecop.travel start_of_minute
|
839
|
+
agent.increment('test')
|
840
|
+
wait do
|
841
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
842
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.values.first.metric).to eq('test')
|
843
|
+
end
|
844
|
+
end
|
845
|
+
|
846
|
+
it "batches data before sending" do
|
847
|
+
Timecop.freeze do
|
848
|
+
agent.increment('a_metric')
|
849
|
+
agent.increment('a_metric')
|
850
|
+
agent.increment('another_metric')
|
851
|
+
end
|
852
|
+
agent.flush(true)
|
853
|
+
wait do
|
854
|
+
expect(server.commands.grep(/_metric/).size).to eq(2)
|
855
|
+
aggregated_metric = server.commands.grep(/a_metric/).first.split(" ")
|
856
|
+
expect(aggregated_metric[2].to_i).to eq(2) # value
|
857
|
+
expect(aggregated_metric[4].to_i).to eq(2) # count
|
858
|
+
end
|
859
|
+
end
|
860
|
+
|
861
|
+
it "aggregates to the specified frequency within the aggregator" do
|
862
|
+
Timecop.travel(start_of_minute)
|
863
|
+
agent.frequency = 15
|
864
|
+
expect(agent.frequency).not_to be(Instrumental::Agent::DEFAULT_FREQUENCY)
|
865
|
+
agent.increment('metric', 1, Time.at(0))
|
866
|
+
|
867
|
+
# will get aligned to the closest frequency (15)
|
868
|
+
agent.increment('metric', 1, Time.at(20))
|
869
|
+
wait do
|
870
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.keys).to eq(["metric:0", "metric:15"])
|
871
|
+
end
|
872
|
+
agent.flush
|
873
|
+
wait do
|
874
|
+
expect(server.commands.grep(/metric 1 0/).size).to eq(1)
|
875
|
+
expect(server.commands.grep(/metric 1 15/).size).to eq(1)
|
876
|
+
end
|
877
|
+
end
|
878
|
+
|
879
|
+
it "flushes data from both queues before sending" do
|
880
|
+
Timecop.freeze do
|
881
|
+
100.times do |i|
|
882
|
+
agent.increment("test_metric_#{i}")
|
883
|
+
agent.increment("other_metric")
|
884
|
+
end
|
885
|
+
end
|
886
|
+
|
887
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to be > 0
|
888
|
+
agent.flush
|
889
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
890
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
891
|
+
|
892
|
+
wait do
|
893
|
+
expect(server.commands.grep(/test_metric/).size).to eq(100)
|
894
|
+
expect(server.commands.grep(/other_metric/).size).to eq(1)
|
895
|
+
end
|
896
|
+
end
|
897
|
+
|
898
|
+
it "does not batch notices" do
|
899
|
+
agent.frequency = 60
|
900
|
+
agent.notice "things are happening", 0, 100
|
901
|
+
agent.notice "things are happening", 0, 100
|
902
|
+
agent.notice "things are happening", 0, 100
|
903
|
+
wait do
|
904
|
+
expect(server.commands.grep(/things are happening/).size).to eq(3)
|
905
|
+
end
|
906
|
+
end
|
907
|
+
|
908
|
+
it "can be disabled by setting frequency to nil" do
|
909
|
+
agent.frequency = nil
|
910
|
+
expect(EventAggregator).not_to receive(:new)
|
911
|
+
agent.increment('metric')
|
912
|
+
wait do
|
913
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
it "can be disabled by setting frequency to 0" do
|
918
|
+
agent.frequency = 0
|
919
|
+
expect(EventAggregator).not_to receive(:new)
|
920
|
+
agent.increment('metric')
|
921
|
+
wait do
|
922
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
923
|
+
end
|
924
|
+
end
|
925
|
+
|
926
|
+
it "automatically uses the highest-without-going-over frequency for a bad frequency" do
|
927
|
+
agent.frequency = 17
|
928
|
+
expect(agent.frequency).to eq(15)
|
929
|
+
agent.frequency = 69420
|
930
|
+
expect(agent.frequency).to eq(60)
|
931
|
+
agent.frequency = 0
|
932
|
+
expect(agent.frequency).to eq(0)
|
933
|
+
agent.frequency = -1
|
934
|
+
expect(agent.frequency).to eq(0)
|
935
|
+
end
|
936
|
+
|
937
|
+
it "can take strings as frequency" do
|
938
|
+
agent = Instrumental::Agent.new('test_token', :frequency => "15")
|
939
|
+
expect(agent.frequency).to eq(15)
|
940
|
+
end
|
941
|
+
|
942
|
+
it "should not be enabled at the same time as synchronous" do
|
943
|
+
expect(Instrumental::Agent.logger).to receive(:warn).with(/Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode./)
|
944
|
+
agent = Instrumental::Agent.new('test_token', :synchronous => true, :frequency => 6)
|
945
|
+
expect(agent.synchronous).to eq(true)
|
946
|
+
expect(agent.frequency).to eq(0)
|
947
|
+
end
|
948
|
+
|
949
|
+
it "should use synchronous mode if it is enabled, even if turned on after frequency set at start" do
|
950
|
+
agent.increment('metric')
|
951
|
+
agent.increment('metric')
|
952
|
+
agent.synchronous = true
|
953
|
+
agent.increment('metric')
|
954
|
+
wait do
|
955
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
956
|
+
end
|
957
|
+
agent.flush
|
958
|
+
wait do
|
959
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
960
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
961
|
+
end
|
962
|
+
end
|
963
|
+
|
964
|
+
it "sends aggregated metrics after specified frequency, even if no flush is sent" do
|
965
|
+
agent.frequency = 1
|
966
|
+
Timecop.travel(start_of_minute)
|
967
|
+
agent.increment('metric')
|
968
|
+
agent.increment('metric')
|
969
|
+
agent.gauge('other', 1)
|
970
|
+
agent.gauge('other', 1)
|
971
|
+
agent.gauge('other', 1)
|
972
|
+
sleep (0.5)
|
973
|
+
wait { expect(server.commands.grep(/metric/).size).to eq(0) }
|
974
|
+
sleep (0.51) # total sleep > 1 frequency
|
975
|
+
|
976
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
977
|
+
expect(server.commands.grep(/other 3/).size).to eq(1)
|
978
|
+
end
|
979
|
+
|
980
|
+
# this test really relies on the worker threads not working unexpectedly
|
981
|
+
it "will overflow if the aggregator queue is full" do
|
982
|
+
Timecop.travel(start_of_minute)
|
983
|
+
with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
|
984
|
+
allow(agent.logger).to receive(:debug)
|
985
|
+
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
|
986
|
+
agent.increment('overflow_test', 4, 300, 1)
|
987
|
+
agent.increment('overflow_test', 4, 300, 1)
|
988
|
+
agent.increment('overflow_test', 4, 300, 1)
|
989
|
+
agent.increment('overflow_test', 4, 300, 1)
|
990
|
+
|
991
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
|
992
|
+
agent.flush
|
993
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
994
|
+
end
|
995
|
+
end
|
996
|
+
|
997
|
+
it "if aggregator is at max size, next command will force a forward to the sender thread" do
|
998
|
+
Timecop.travel(start_of_minute)
|
999
|
+
with_constants('Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 3) do
|
1000
|
+
agent.increment('overflow_test1')
|
1001
|
+
agent.increment('overflow_test2')
|
1002
|
+
agent.increment('overflow_test3')
|
1003
|
+
agent.increment('overflow_test4')
|
1004
|
+
agent.increment('overflow_test5')
|
1005
|
+
|
1006
|
+
# only 1 because the 5th command triggers a forward of the first 4
|
1007
|
+
wait do
|
1008
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
1009
|
+
end
|
1010
|
+
agent.flush
|
1011
|
+
wait do
|
1012
|
+
expect(server.commands.grep(/overflow_test/).size).to eq(5)
|
1013
|
+
end
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
context do
|
1018
|
+
let(:listen) { false }
|
1019
|
+
it "will not send aggregators to the sender queue if the sender thread is not ready" do
|
1020
|
+
Timecop.travel(start_of_minute)
|
1021
|
+
agent.frequency = 1
|
1022
|
+
|
1023
|
+
with_constants('Instrumental::Agent::MAX_BUFFER' => 3,
|
1024
|
+
'Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 4) do
|
1025
|
+
|
1026
|
+
# fill the queue
|
1027
|
+
agent.increment('overflow_test1')
|
1028
|
+
agent.increment('overflow_test2')
|
1029
|
+
agent.increment('overflow_test3')
|
1030
|
+
|
1031
|
+
# wait until they are all in the aggregator
|
1032
|
+
wait do
|
1033
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1034
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1035
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
# fill the queue again
|
1039
|
+
agent.increment('overflow_test1')
|
1040
|
+
agent.increment('overflow_test2')
|
1041
|
+
agent.increment('overflow_test3')
|
1042
|
+
|
1043
|
+
# wait until they are all in the aggregator
|
1044
|
+
wait do
|
1045
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1046
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1047
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# wait for the aggregator to get forwarded and popped by the sender
|
1051
|
+
wait do
|
1052
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1053
|
+
expect(agent.instance_variable_get(:@event_aggregator)).to eq(nil)
|
1054
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
# fill the queue again
|
1058
|
+
agent.increment('overflow_test4')
|
1059
|
+
agent.increment('overflow_test5')
|
1060
|
+
agent.increment('overflow_test6')
|
1061
|
+
|
1062
|
+
# wait for them all to be in the aggregator
|
1063
|
+
wait do
|
1064
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1065
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1066
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
# sleep until the next forward is done
|
1070
|
+
sleep(agent.frequency + 0.1)
|
1071
|
+
|
1072
|
+
# fill the queue again
|
1073
|
+
agent.increment('overflow_test7')
|
1074
|
+
agent.increment('overflow_test8')
|
1075
|
+
agent.increment('overflow_test9')
|
1076
|
+
|
1077
|
+
# because sending is blocked, the prevous aggregator never sent
|
1078
|
+
# when it hits max size, the aggregator queue starts backing up
|
1079
|
+
wait do
|
1080
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(1)
|
1081
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1082
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
# send 3 more items, to overflow the aggregator queue
|
1086
|
+
allow(agent.logger).to receive(:debug)
|
1087
|
+
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_testc 4 300 1")
|
1088
|
+
agent.increment('overflow_testa')
|
1089
|
+
agent.increment('overflow_testb')
|
1090
|
+
agent.increment('overflow_testc', 4, 300, 1) # will get dropped
|
1091
|
+
|
1092
|
+
wait do
|
1093
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
|
1094
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1095
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1096
|
+
end
|
1097
|
+
end
|
1098
|
+
end
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
end
|
805
1102
|
end
|
806
1103
|
end
|
807
1104
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
describe Instrumental::Command, "basic functions of command structs" do
|
5
|
+
it "should not allow bad arguments to command#+" do
|
6
|
+
command = Instrumental::Command.new("gauge", "abc", 1, Time.at(0), 1)
|
7
|
+
|
8
|
+
# nil is a no-op
|
9
|
+
expect(command + nil).to eq(command)
|
10
|
+
# it will change the top of the other command
|
11
|
+
expect(command + Instrumental::Command.new("increment", "abc", 1, Time.at(0), 1))
|
12
|
+
.to eq(Instrumental::Command.new("gauge", "abc", 2, Time.at(0), 2))
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should add together with like commands" do
|
16
|
+
command = Instrumental::Command.new("gauge", "abc", 1, Time.at(0), 1)
|
17
|
+
other = Instrumental::Command.new("gauge", "abc", 2, Time.at(0), 4)
|
18
|
+
expect(command + other).to eq(Instrumental::Command.new("gauge", "abc", 3, Time.at(0), 5))
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EventAggregator, "time and frequency operations" do
|
4
|
+
it "should massage time values to match the start of a window" do
|
5
|
+
agg = EventAggregator.new(frequency: 10)
|
6
|
+
Timecop.freeze do
|
7
|
+
start_of_minute = Time.now.to_i - (Time.now.to_i % 60)
|
8
|
+
times_to_report = [start_of_minute + 5, start_of_minute + 15]
|
9
|
+
|
10
|
+
times_to_report.each do |at_time|
|
11
|
+
agg.put(Instrumental::Command.new("gauge", "abc", 5, Time.at(at_time), 1))
|
12
|
+
end
|
13
|
+
|
14
|
+
expect(agg.size).to eq(2)
|
15
|
+
|
16
|
+
expected_values = [Instrumental::Command.new("gauge", "abc", 5, Time.at(start_of_minute), 1),
|
17
|
+
Instrumental::Command.new("gauge", "abc", 5, Time.at(start_of_minute + 10), 1)]
|
18
|
+
expect(agg.values.values).to eq(expected_values)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe EventAggregator do
|
24
|
+
it "should aggregate put operations to a given frequency" do
|
25
|
+
start_of_minute = Time.now.to_i - (Time.now.to_i % 60)
|
26
|
+
Timecop.freeze(Time.at(start_of_minute)) do
|
27
|
+
agg = EventAggregator.new(frequency: 30)
|
28
|
+
(Time.now.to_i..(Time.now.to_i + 119)).each do |time|
|
29
|
+
agg.put(Instrumental::Command.new("increment", "abc", 1, time, 1))
|
30
|
+
end
|
31
|
+
expect(agg.size).to eq(4)
|
32
|
+
(Time.now.to_i..(Time.now.to_i + 119)).step(30).map do |time|
|
33
|
+
expect(agg.values["abc:#{time}"]).to eq(Instrumental::Command.new("increment", "abc", 30, time, 30))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should aggregate put operations to the same metric and last type wins" do
|
39
|
+
Timecop.freeze do
|
40
|
+
agg = EventAggregator.new(frequency: 6)
|
41
|
+
|
42
|
+
agg.put(Instrumental::Command.new("gauge", "hello", 3.0, Time.now, 1))
|
43
|
+
agg.put(Instrumental::Command.new("increment", "hello", 4.0, Time.now, 1))
|
44
|
+
|
45
|
+
expect(agg.size).to eq(1)
|
46
|
+
expect(agg.values.values.first).to eq(Instrumental::Command.new("increment",
|
47
|
+
"hello",
|
48
|
+
7.0,
|
49
|
+
agg.coerce_time(Time.now),
|
50
|
+
2))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instrumental_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Expected Behavior
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: metrician
|
@@ -118,12 +118,16 @@ files:
|
|
118
118
|
- lib/instrumental/capistrano.rb
|
119
119
|
- lib/instrumental/capistrano/capistrano2.rb
|
120
120
|
- lib/instrumental/capistrano/capistrano3.rake
|
121
|
+
- lib/instrumental/command_structs.rb
|
122
|
+
- lib/instrumental/event_aggregator.rb
|
121
123
|
- lib/instrumental/system_timer.rb
|
122
124
|
- lib/instrumental/version.rb
|
123
125
|
- lib/instrumental_agent.rb
|
124
126
|
- script/setup
|
125
127
|
- script/test
|
126
128
|
- spec/agent_spec.rb
|
129
|
+
- spec/command_struct_specs.rb
|
130
|
+
- spec/event_aggregator_spec.rb
|
127
131
|
- spec/spec_helper.rb
|
128
132
|
- spec/test.crt
|
129
133
|
- spec/test.csr
|
@@ -133,7 +137,7 @@ homepage: http://github.com/instrumental/instrumental_agent-ruby
|
|
133
137
|
licenses:
|
134
138
|
- MIT
|
135
139
|
metadata: {}
|
136
|
-
post_install_message:
|
140
|
+
post_install_message:
|
137
141
|
rdoc_options: []
|
138
142
|
require_paths:
|
139
143
|
- lib
|
@@ -141,7 +145,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
141
145
|
requirements:
|
142
146
|
- - ">="
|
143
147
|
- !ruby/object:Gem::Version
|
144
|
-
version: 2.
|
148
|
+
version: 2.5.7
|
145
149
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
150
|
requirements:
|
147
151
|
- - ">"
|
@@ -149,11 +153,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
153
|
version: 1.3.1
|
150
154
|
requirements: []
|
151
155
|
rubygems_version: 3.0.3
|
152
|
-
signing_key:
|
156
|
+
signing_key:
|
153
157
|
specification_version: 4
|
154
158
|
summary: Custom metric monitoring for Ruby applications via Instrumental
|
155
159
|
test_files:
|
156
160
|
- spec/agent_spec.rb
|
161
|
+
- spec/command_struct_specs.rb
|
162
|
+
- spec/event_aggregator_spec.rb
|
157
163
|
- spec/spec_helper.rb
|
158
164
|
- spec/test.crt
|
159
165
|
- spec/test.csr
|