instrumental_agent 3.0.0.alpha → 3.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -2
- data/CHANGELOG.md +6 -0
- data/README.md +22 -0
- data/instrumental_agent.gemspec +1 -1
- data/lib/instrumental/agent.rb +191 -65
- data/lib/instrumental/command_structs.rb +32 -0
- data/lib/instrumental/event_aggregator.rb +26 -0
- data/lib/instrumental/version.rb +1 -1
- data/spec/agent_spec.rb +332 -35
- data/spec/command_struct_specs.rb +20 -0
- data/spec/event_aggregator_spec.rb +53 -0
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1a781a0abec98e8c737a57fac5134fe2ee847879949fc5134ced2c0b0136076
|
4
|
+
data.tar.gz: 89950c6e0b59713dc6034be99acfc52473ef049b63824685694626d049a85b8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca82be6040189400c0e01513d517202e8d27d900ad2f10e5558025cad6a499e49985ee58c95efdf90d24e2fd0174b9349c7cd6ac8f906325f2de766c0f14cba3
|
7
|
+
data.tar.gz: 742200da53f676a9341a795f5f8145416650c033466782348fc4852dc62e656e9fed4e2be02c0829c2ec946092e7dfcd7d836cc50e731a6ceb8cb2493b76a1b6
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
### 3.0.0.beta [October 8, 2020]
|
2
|
+
* Drop support for outdated versions of Ruby
|
3
|
+
* Explicitly add support for new versions of Ruby
|
4
|
+
* Add support for client-side aggregation
|
5
|
+
* Note: the agent API has NOT changed. This is a major release because of the significant changes in Ruby versions officially supported.
|
6
|
+
|
1
7
|
### 3.0.0.alpha [August 22, 2019]
|
2
8
|
* Drop support for outdated versions of Ruby
|
3
9
|
* Explicitly add support for new versions of Ruby
|
data/README.md
CHANGED
@@ -59,6 +59,17 @@ User.find_each do |user|
|
|
59
59
|
end
|
60
60
|
```
|
61
61
|
|
62
|
+
## Aggregation
|
63
|
+
Aggregation collects more data on your system before sending it to Instrumental. This reduces the total amount of data being sent, at the cost of a small amount of additional latency. You can control this feature with the frequency parameter:
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN', :frequency => 15) # send data every 15 seconds
|
67
|
+
I.frequency = 6 # send batches of data every 6 seconds
|
68
|
+
```
|
69
|
+
|
70
|
+
The agent may send data more frequently if you are sending a large number of different metrics. Values between 3 and 15 are generally reasonable. If you want to disable this behavior and send every metric as fast as possible, set frequency to zero or nil. Note that a frequency of zero will still use a seperate thread for performance - it is NOT the same as synchronous mode.
|
71
|
+
|
72
|
+
|
62
73
|
## Server Metrics
|
63
74
|
|
64
75
|
Want server stats like load, memory, etc.? Check out [InstrumentalD](https://github.com/instrumental/instrumentald).
|
@@ -109,6 +120,17 @@ I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
|
109
120
|
)
|
110
121
|
```
|
111
122
|
|
123
|
+
### Upgrading from 2.x
|
124
|
+
|
125
|
+
Agent version 3.x drops support for some older rubies, but should otherwise be a drop-in replacement. If you wish to enable Aggregation, enable the agent with the frequency option set to the number of seconds you would like to wait between flushes. For example:
|
126
|
+
|
127
|
+
```
|
128
|
+
I = Instrumental::Agent.new('PROJECT_API_TOKEN',
|
129
|
+
:enabled => Rails.env.production?,
|
130
|
+
:frequency => 15
|
131
|
+
)
|
132
|
+
```
|
133
|
+
|
112
134
|
## Troubleshooting & Help
|
113
135
|
|
114
136
|
We are here to help. Email us at [support@instrumentalapp.com](mailto:support@instrumentalapp.com).
|
data/instrumental_agent.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.summary = %q{Custom metric monitoring for Ruby applications via Instrumental}
|
11
11
|
s.description = %q{This agent supports Instrumental custom metric monitoring for Ruby applications. It provides high-data reliability at high scale, without ever blocking your process or causing an exception.}
|
12
12
|
s.license = "MIT"
|
13
|
-
s.required_ruby_version = '>= 2.
|
13
|
+
s.required_ruby_version = '>= 2.5.7'
|
14
14
|
|
15
15
|
s.files = `git ls-files`.split("\n")
|
16
16
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/instrumental/agent.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'instrumental/version'
|
2
2
|
require 'instrumental/system_timer'
|
3
|
+
require 'instrumental/command_structs'
|
4
|
+
require 'instrumental/event_aggregator'
|
3
5
|
require 'logger'
|
4
6
|
require 'openssl' rescue nil
|
5
7
|
require 'resolv'
|
@@ -15,14 +17,17 @@ module Instrumental
|
|
15
17
|
EXIT_FLUSH_TIMEOUT = 5
|
16
18
|
HOSTNAME = Socket.gethostbyname(Socket.gethostname).first rescue Socket.gethostname
|
17
19
|
MAX_BUFFER = 5000
|
20
|
+
MAX_AGGREGATOR_SIZE = 5000
|
18
21
|
MAX_RECONNECT_DELAY = 15
|
19
22
|
REPLY_TIMEOUT = 10
|
20
23
|
RESOLUTION_FAILURES_BEFORE_WAITING = 3
|
21
24
|
RESOLUTION_WAIT = 30
|
22
25
|
RESOLVE_TIMEOUT = 1
|
26
|
+
DEFAULT_FREQUENCY = 0
|
27
|
+
VALID_FREQUENCIES = [0, 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60]
|
23
28
|
|
24
29
|
|
25
|
-
attr_accessor :host, :port, :synchronous, :
|
30
|
+
attr_accessor :host, :port, :synchronous, :frequency, :sender_queue, :aggregator_queue, :dns_resolutions, :last_connect_at
|
26
31
|
attr_reader :connection, :enabled, :secure
|
27
32
|
|
28
33
|
def self.logger=(l)
|
@@ -52,6 +57,7 @@ module Instrumental
|
|
52
57
|
# port: 8001
|
53
58
|
# enabled: true
|
54
59
|
# synchronous: false
|
60
|
+
# frequency: 10
|
55
61
|
# secure: true
|
56
62
|
# verify: true
|
57
63
|
@api_key = api_key
|
@@ -73,13 +79,23 @@ module Instrumental
|
|
73
79
|
@port = (@port || default_port).to_i
|
74
80
|
@enabled = options.has_key?(:enabled) ? !!options[:enabled] : true
|
75
81
|
@synchronous = !!options[:synchronous]
|
82
|
+
|
83
|
+
if options.has_key?(:frequency)
|
84
|
+
self.frequency = options[:frequency]
|
85
|
+
else
|
86
|
+
self.frequency = DEFAULT_FREQUENCY
|
87
|
+
end
|
88
|
+
|
89
|
+
@metrician = options[:metrician].nil? ? true : !!options[:metrician]
|
76
90
|
@pid = Process.pid
|
77
91
|
@allow_reconnect = true
|
78
92
|
@dns_resolutions = 0
|
79
93
|
@last_connect_at = 0
|
80
|
-
|
94
|
+
|
81
95
|
@start_worker_mutex = Mutex.new
|
82
|
-
@
|
96
|
+
@aggregator_queue = Queue.new
|
97
|
+
@sender_queue = Queue.new
|
98
|
+
|
83
99
|
|
84
100
|
setup_cleanup_at_exit if @enabled
|
85
101
|
|
@@ -93,7 +109,9 @@ module Instrumental
|
|
93
109
|
# agent.gauge('load', 1.23)
|
94
110
|
def gauge(metric, value, time = Time.now, count = 1)
|
95
111
|
if valid?(metric, value, time, count) &&
|
96
|
-
|
112
|
+
send_command(Instrumental::Command.new("gauge".freeze, metric, value, time, count))
|
113
|
+
# tempted to "gauge" this to a symbol? Don't. Frozen strings are very fast,
|
114
|
+
# and later we're going to to_s every one of these anyway.
|
97
115
|
value
|
98
116
|
else
|
99
117
|
nil
|
@@ -141,7 +159,7 @@ module Instrumental
|
|
141
159
|
# agent.increment('users')
|
142
160
|
def increment(metric, value = 1, time = Time.now, count = 1)
|
143
161
|
if valid?(metric, value, time, count) &&
|
144
|
-
|
162
|
+
send_command(Instrumental::Command.new("increment".freeze, metric, value, time, count))
|
145
163
|
value
|
146
164
|
else
|
147
165
|
nil
|
@@ -156,7 +174,7 @@ module Instrumental
|
|
156
174
|
# agent.notice('A notice')
|
157
175
|
def notice(note, time = Time.now, duration = 0)
|
158
176
|
if valid_note?(note)
|
159
|
-
send_command(
|
177
|
+
send_command(Instrumental::Notice.new(note, time, duration))
|
160
178
|
note
|
161
179
|
else
|
162
180
|
nil
|
@@ -195,6 +213,22 @@ module Instrumental
|
|
195
213
|
@logger || self.class.logger
|
196
214
|
end
|
197
215
|
|
216
|
+
def frequency=(frequency)
|
217
|
+
freq = frequency.to_i
|
218
|
+
if !VALID_FREQUENCIES.include?(freq)
|
219
|
+
logger.warn "Frequency must be a value that divides evenly into 60: 1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, or 60."
|
220
|
+
# this will make all negative numbers and nils into 0s
|
221
|
+
freq = VALID_FREQUENCIES.select{ |f| f < freq }.max.to_i
|
222
|
+
end
|
223
|
+
|
224
|
+
@frequency = if(@synchronous)
|
225
|
+
logger.warn "Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode."
|
226
|
+
0
|
227
|
+
else
|
228
|
+
freq
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
198
232
|
# Stopping the agent will immediately stop all communication
|
199
233
|
# to Instrumental. If you call this and submit another metric,
|
200
234
|
# the agent will start again.
|
@@ -206,12 +240,19 @@ module Instrumental
|
|
206
240
|
#
|
207
241
|
def stop
|
208
242
|
disconnect
|
209
|
-
if @
|
210
|
-
@
|
211
|
-
@
|
243
|
+
if @sender_thread
|
244
|
+
@sender_thread.kill
|
245
|
+
@sender_thread = nil
|
246
|
+
end
|
247
|
+
if @aggregator_thread
|
248
|
+
@aggregator_thread.kill
|
249
|
+
@aggregator_thread = nil
|
212
250
|
end
|
213
|
-
if @
|
214
|
-
@
|
251
|
+
if @sender_queue
|
252
|
+
@sender_queue.clear
|
253
|
+
end
|
254
|
+
if @aggregator_queue
|
255
|
+
@aggregator_queue.clear
|
215
256
|
end
|
216
257
|
end
|
217
258
|
|
@@ -221,15 +262,22 @@ module Instrumental
|
|
221
262
|
# where at_exit is bypassed like Resque workers.
|
222
263
|
def cleanup
|
223
264
|
if running?
|
224
|
-
logger.info "Cleaning up agent,
|
265
|
+
logger.info "Cleaning up agent, aggregator_size: #{@aggregator_queue.size}, thread_running: #{@aggregator_thread.alive?}"
|
266
|
+
logger.info "Cleaning up agent, queue size: #{@sender_queue.size}, thread running: #{@sender_thread.alive?}"
|
225
267
|
@allow_reconnect = false
|
226
|
-
if @
|
227
|
-
|
268
|
+
if @sender_queue.size > 0 || @aggregator_queue.size > 0
|
269
|
+
@sender_queue << ['exit']
|
270
|
+
@aggregator_queue << ['exit']
|
228
271
|
begin
|
229
|
-
with_timeout(EXIT_FLUSH_TIMEOUT) { @
|
272
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) { @aggregator_thread.join }
|
273
|
+
with_timeout(EXIT_FLUSH_TIMEOUT) { @sender_thread.join }
|
230
274
|
rescue Timeout::Error
|
231
|
-
|
232
|
-
|
275
|
+
total_size = @sender_queue&.size.to_i +
|
276
|
+
@aggregator_queue&.size.to_i +
|
277
|
+
@event_aggregator&.size.to_i
|
278
|
+
|
279
|
+
if total_size > 0
|
280
|
+
logger.error "Timed out working agent thread on exit, dropping #{total_size} metrics"
|
233
281
|
else
|
234
282
|
logger.error "Timed out Instrumental Agent, exiting"
|
235
283
|
end
|
@@ -270,6 +318,7 @@ module Instrumental
|
|
270
318
|
end
|
271
319
|
|
272
320
|
def report_exception(e)
|
321
|
+
# puts "--- Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
273
322
|
logger.error "Exception of type #{e.class} occurred:\n#{e.message}\n#{e.backtrace.join("\n")}"
|
274
323
|
end
|
275
324
|
|
@@ -290,44 +339,41 @@ module Instrumental
|
|
290
339
|
nil
|
291
340
|
end
|
292
341
|
|
293
|
-
def send_command(
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
queue_message(cmd, { :synchronous => @synchronous })
|
302
|
-
else
|
303
|
-
if !@queue_full_warning
|
304
|
-
@queue_full_warning = true
|
305
|
-
logger.warn "Queue full(#{@queue.size}), dropping commands..."
|
306
|
-
end
|
307
|
-
logger.debug "Dropping command, queue full(#{@queue.size}): #{cmd.chomp}"
|
308
|
-
nil
|
309
|
-
end
|
342
|
+
def send_command(command)
|
343
|
+
return logger.debug(command.to_s) unless enabled?
|
344
|
+
start_workers
|
345
|
+
critical_queue = frequency.to_i == 0 ? @sender_queue : @aggregator_queue
|
346
|
+
if critical_queue && critical_queue.size < MAX_BUFFER
|
347
|
+
@queue_full_warning = false
|
348
|
+
logger.debug "Queueing: #{command.to_s}"
|
349
|
+
queue_message(command, { :synchronous => @synchronous })
|
310
350
|
else
|
311
|
-
|
351
|
+
if !@queue_full_warning
|
352
|
+
@queue_full_warning = true
|
353
|
+
logger.warn "Queue full(#{critical_queue.size}), dropping commands..."
|
354
|
+
end
|
355
|
+
logger.debug "Dropping command, queue full(#{critical_queue.size}): #{command.to_s}"
|
356
|
+
nil
|
312
357
|
end
|
313
358
|
end
|
314
359
|
|
315
360
|
def queue_message(message, options = {})
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
@
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
361
|
+
return message unless enabled?
|
362
|
+
|
363
|
+
# imagine it's a reverse merge, but with fewer allocations
|
364
|
+
options[:allow_reconnect] = @allow_reconnect unless options.has_key?(:allow_reconnect)
|
365
|
+
|
366
|
+
if options.delete(:synchronous)
|
367
|
+
options[:sync_resource] ||= ConditionVariable.new
|
368
|
+
@sync_mutex.synchronize {
|
369
|
+
queue = message == "flush" ? @aggregator_queue : @sender_queue
|
370
|
+
queue << [message, options]
|
371
|
+
options[:sync_resource].wait(@sync_mutex)
|
372
|
+
}
|
373
|
+
elsif frequency.to_i == 0
|
374
|
+
@sender_queue << [message, options]
|
375
|
+
else
|
376
|
+
@aggregator_queue << [message, options]
|
331
377
|
end
|
332
378
|
message
|
333
379
|
end
|
@@ -355,9 +401,9 @@ module Instrumental
|
|
355
401
|
end
|
356
402
|
end
|
357
403
|
|
358
|
-
def
|
404
|
+
def start_workers
|
359
405
|
# NOTE: We need a mutex around both `running?` and thread creation,
|
360
|
-
# otherwise we could create
|
406
|
+
# otherwise we could create too many threads.
|
361
407
|
# Return early and queue the message if another thread is
|
362
408
|
# starting the worker.
|
363
409
|
return if !@start_worker_mutex.try_lock
|
@@ -371,9 +417,19 @@ module Instrumental
|
|
371
417
|
@sync_mutex = Mutex.new
|
372
418
|
@failures = 0
|
373
419
|
@sockaddr_in = Socket.pack_sockaddr_in(@port, address)
|
374
|
-
|
375
|
-
|
376
|
-
|
420
|
+
|
421
|
+
logger.info "Starting aggregator thread"
|
422
|
+
if !@aggregator_thread&.alive?
|
423
|
+
@aggregator_thread = Thread.new do
|
424
|
+
run_aggregator_loop
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
if !@sender_thread&.alive?
|
429
|
+
logger.info "Starting sender thread"
|
430
|
+
@sender_thread = Thread.new do
|
431
|
+
run_sender_loop
|
432
|
+
end
|
377
433
|
end
|
378
434
|
end
|
379
435
|
ensure
|
@@ -409,12 +465,73 @@ module Instrumental
|
|
409
465
|
sock
|
410
466
|
end
|
411
467
|
|
412
|
-
def
|
468
|
+
def run_aggregator_loop
|
469
|
+
# if the sender queue is some level of full, should we keep aggregating until it empties out?
|
470
|
+
# what does this mean for aggregation slices - aggregating to nearest frequency will
|
471
|
+
# make the object needlessly larger, when minute resolution is what we have on the server
|
472
|
+
begin
|
473
|
+
loop do
|
474
|
+
now = Time.now.to_i
|
475
|
+
time_to_wait = if frequency == 0
|
476
|
+
0
|
477
|
+
else
|
478
|
+
next_frequency = (now - (now % frequency)) + frequency
|
479
|
+
time_to_wait = [(next_frequency - Time.now.to_f), 0].max
|
480
|
+
end
|
481
|
+
|
482
|
+
command_and_args, command_options = if @event_aggregator&.size.to_i > MAX_AGGREGATOR_SIZE
|
483
|
+
logger.info "Aggregator full, flushing early with #{MAX_AGGREGATOR_SIZE} metrics."
|
484
|
+
command_and_args, command_options = ['forward', {}]
|
485
|
+
else
|
486
|
+
begin
|
487
|
+
with_timeout(time_to_wait) do
|
488
|
+
@aggregator_queue.pop
|
489
|
+
end
|
490
|
+
rescue Timeout::Error
|
491
|
+
['forward', {}]
|
492
|
+
end
|
493
|
+
end
|
494
|
+
if command_and_args
|
495
|
+
sync_resource = command_options && command_options[:sync_resource]
|
496
|
+
case command_and_args
|
497
|
+
when 'exit'
|
498
|
+
logger.info "Exiting, #{@aggregator_queue.size} commands remain"
|
499
|
+
return true
|
500
|
+
when 'flush'
|
501
|
+
if !@event_aggregator.nil?
|
502
|
+
@sender_queue << @event_aggregator
|
503
|
+
@event_aggregator = nil
|
504
|
+
end
|
505
|
+
@sender_queue << ['flush', command_options]
|
506
|
+
when 'forward'
|
507
|
+
if !@event_aggregator.nil?
|
508
|
+
next if @sender_queue.size > 0 && @sender_queue.num_waiting < 1
|
509
|
+
@sender_queue << @event_aggregator
|
510
|
+
@event_aggregator = nil
|
511
|
+
end
|
512
|
+
when Notice
|
513
|
+
@sender_queue << [command_and_args, command_options]
|
514
|
+
else
|
515
|
+
@event_aggregator = EventAggregator.new(frequency: @frequency) if @event_aggregator.nil?
|
516
|
+
|
517
|
+
logger.debug "Sending: #{command_and_args} to aggregator"
|
518
|
+
@event_aggregator.put(command_and_args)
|
519
|
+
end
|
520
|
+
command_and_args = nil
|
521
|
+
command_options = nil
|
522
|
+
end
|
523
|
+
end
|
524
|
+
rescue Exception => err
|
525
|
+
report_exception(err)
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def run_sender_loop
|
413
530
|
@failures = 0
|
414
531
|
begin
|
415
|
-
|
416
|
-
|
417
|
-
|
532
|
+
logger.info "connecting to collector"
|
533
|
+
command_and_args = nil
|
534
|
+
command_options = nil
|
418
535
|
with_timeout(CONNECT_TIMEOUT) do
|
419
536
|
@socket = open_socket(@sockaddr_in, @secure, @verify_cert)
|
420
537
|
end
|
@@ -431,18 +548,23 @@ module Instrumental
|
|
431
548
|
send_with_reply_timeout "authenticate #{@api_key}"
|
432
549
|
|
433
550
|
loop do
|
434
|
-
command_and_args, command_options = @
|
551
|
+
command_and_args, command_options = @sender_queue.pop
|
435
552
|
if command_and_args
|
436
553
|
sync_resource = command_options && command_options[:sync_resource]
|
437
554
|
test_connection
|
438
555
|
case command_and_args
|
439
556
|
when 'exit'
|
440
|
-
logger.info "Exiting, #{@
|
557
|
+
logger.info "Exiting, #{@sender_queue.size} commands remain"
|
441
558
|
return true
|
442
559
|
when 'flush'
|
443
560
|
release_resource = true
|
561
|
+
when EventAggregator
|
562
|
+
command_and_args.values.values.each do |command|
|
563
|
+
logger.debug "Sending: #{command}"
|
564
|
+
@socket.puts command
|
565
|
+
end
|
444
566
|
else
|
445
|
-
logger.debug "Sending: #{command_and_args
|
567
|
+
logger.debug "Sending: #{command_and_args}"
|
446
568
|
@socket.puts command_and_args
|
447
569
|
end
|
448
570
|
command_and_args = nil
|
@@ -464,7 +586,7 @@ module Instrumental
|
|
464
586
|
# or we cannot reach the server
|
465
587
|
# or the connection state of this socket is in a race
|
466
588
|
# or SSL is not functioning properly for some reason
|
467
|
-
logger.error "unable to connect to Instrumental, hanging up with #{@
|
589
|
+
logger.error "unable to connect to Instrumental, hanging up with #{@sender_queue.size} messages remaining"
|
468
590
|
logger.debug "Exception: #{err.inspect}\n#{err.backtrace.join("\n")}"
|
469
591
|
allow_reconnect = false
|
470
592
|
else
|
@@ -478,7 +600,7 @@ module Instrumental
|
|
478
600
|
end
|
479
601
|
if command_and_args
|
480
602
|
logger.debug "requeueing: #{command_and_args}"
|
481
|
-
@
|
603
|
+
@sender_queue << command_and_args
|
482
604
|
end
|
483
605
|
disconnect
|
484
606
|
@failures += 1
|
@@ -498,7 +620,11 @@ module Instrumental
|
|
498
620
|
end
|
499
621
|
|
500
622
|
def running?
|
501
|
-
!@
|
623
|
+
!@sender_thread.nil? &&
|
624
|
+
!@aggregator_thread.nil? &&
|
625
|
+
@pid == Process.pid &&
|
626
|
+
@sender_thread.alive? &&
|
627
|
+
@aggregator_thread.alive?
|
502
628
|
end
|
503
629
|
|
504
630
|
def flush_socket(socket)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Instrumental
|
2
|
+
METRIC_TYPES = ["increment".freeze, "gauge".freeze].freeze
|
3
|
+
|
4
|
+
Command = Struct.new(:command, :metric, :value, :time, :count) do
|
5
|
+
def initialize(command, metric, value, time, count)
|
6
|
+
super(command, metric, value, time.to_i, count.to_i)
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
[command, metric, value, time, count].map(&:to_s).join(" ")
|
11
|
+
end
|
12
|
+
|
13
|
+
def metadata
|
14
|
+
"#{metric}:#{time}".freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def +(other_command)
|
18
|
+
return self if other_command.nil?
|
19
|
+
Command.new(command, metric, value + other_command.value, time, count + other_command.count)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Notice = Struct.new(:note, :time, :duration) do
|
24
|
+
def initialize(note, time, duration)
|
25
|
+
super(note, time.to_i, duration.to_i)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
["notice".freeze, time, duration, note].map(&:to_s).join(" ")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class EventAggregator
|
2
|
+
attr_accessor :counts, :values, :received_at, :frequency
|
3
|
+
|
4
|
+
def initialize(frequency:)
|
5
|
+
@values = Hash.new
|
6
|
+
@frequency = frequency
|
7
|
+
end
|
8
|
+
|
9
|
+
def put(command)
|
10
|
+
command_at = command.time
|
11
|
+
unless(command_at % frequency == 0)
|
12
|
+
command.time = (command_at - (command_at % frequency))
|
13
|
+
end
|
14
|
+
metadata = command.metadata
|
15
|
+
@values[metadata] = (command + @values[metadata])
|
16
|
+
end
|
17
|
+
|
18
|
+
def size
|
19
|
+
@values.size
|
20
|
+
end
|
21
|
+
|
22
|
+
def coerce_time(time)
|
23
|
+
itime = time.to_i
|
24
|
+
(itime - (itime % frequency)).to_i
|
25
|
+
end
|
26
|
+
end
|
data/lib/instrumental/version.rb
CHANGED
data/spec/agent_spec.rb
CHANGED
@@ -39,7 +39,8 @@ shared_examples "Instrumental Agent" do
|
|
39
39
|
let(:token) { 'test_token' }
|
40
40
|
let(:address) { server.host_and_port }
|
41
41
|
let(:metrician) { false }
|
42
|
-
let(:
|
42
|
+
let(:frequency) { 0 }
|
43
|
+
let(:agent) { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician, :frequency => frequency) }
|
43
44
|
|
44
45
|
# Server options
|
45
46
|
let(:listen) { true }
|
@@ -47,6 +48,12 @@ shared_examples "Instrumental Agent" do
|
|
47
48
|
let(:authenticate) { true }
|
48
49
|
let(:server) { TestServer.new(:listen => listen, :authenticate => authenticate, :response => response, :secure => secure?) }
|
49
50
|
|
51
|
+
# Time Travel Options
|
52
|
+
let(:start_of_minute) do
|
53
|
+
now = Time.now.to_i
|
54
|
+
Time.at(now - (now % 60))
|
55
|
+
end
|
56
|
+
|
50
57
|
before do
|
51
58
|
Instrumental::Agent.logger.level = Logger::UNKNOWN
|
52
59
|
@server = server
|
@@ -233,11 +240,11 @@ shared_examples "Instrumental Agent" do
|
|
233
240
|
end
|
234
241
|
|
235
242
|
wait
|
236
|
-
expect(agent.
|
237
|
-
expect(agent.
|
238
|
-
expect(agent.
|
239
|
-
expect(agent.
|
240
|
-
expect(agent.
|
243
|
+
expect(agent.sender_queue.size).to eq(3)
|
244
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 1 300 1")
|
245
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 2 300 1")
|
246
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 3 300 1")
|
247
|
+
expect(agent.sender_queue.size).to eq(0)
|
241
248
|
end
|
242
249
|
end
|
243
250
|
end
|
@@ -248,7 +255,7 @@ shared_examples "Instrumental Agent" do
|
|
248
255
|
5.times do |i|
|
249
256
|
agent.increment('overflow_test', i + 1, 300)
|
250
257
|
end
|
251
|
-
expect(agent.instance_variable_get(:@
|
258
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
252
259
|
wait # let the server receive the commands
|
253
260
|
expect(server.commands).to include("increment overflow_test 1 300 1")
|
254
261
|
expect(server.commands).to include("increment overflow_test 2 300 1")
|
@@ -264,8 +271,10 @@ shared_examples "Instrumental Agent" do
|
|
264
271
|
fork do
|
265
272
|
agent.increment('fork_reconnect_test', 1, 3) # triggers reconnect
|
266
273
|
end
|
274
|
+
|
267
275
|
wait(1)
|
268
276
|
agent.increment('fork_reconnect_test', 1, 4) # triggers reconnect
|
277
|
+
|
269
278
|
wait(1)
|
270
279
|
expect(server.connect_count).to eq(2)
|
271
280
|
|
@@ -281,17 +290,17 @@ shared_examples "Instrumental Agent" do
|
|
281
290
|
sleep 1
|
282
291
|
}
|
283
292
|
|
284
|
-
|
285
|
-
allow(agent).to receive(:
|
286
|
-
|
293
|
+
run_sender_loop_calls = 0
|
294
|
+
allow(agent).to receive(:run_sender_loop) {
|
295
|
+
run_sender_loop_calls += 1
|
287
296
|
sleep 3 # keep the worker thread alive
|
288
297
|
}
|
289
298
|
|
290
299
|
t = Thread.new { agent.increment("race") }
|
291
300
|
agent.increment("race")
|
292
301
|
wait(2)
|
293
|
-
expect(
|
294
|
-
expect(agent.
|
302
|
+
expect(run_sender_loop_calls).to eq(1)
|
303
|
+
expect(agent.sender_queue.size).to eq(2)
|
295
304
|
end
|
296
305
|
|
297
306
|
it "should never let an exception reach the user" do
|
@@ -314,14 +323,6 @@ shared_examples "Instrumental Agent" do
|
|
314
323
|
expect(agent.increment("test")).to eq(nil)
|
315
324
|
end
|
316
325
|
|
317
|
-
it "should track invalid metrics" do
|
318
|
-
expect(agent.logger).to receive(:warn).with(/%%/)
|
319
|
-
agent.increment(' %% .!#@$%^&*', 1, 1)
|
320
|
-
wait do
|
321
|
-
expect(server.commands.join("\n")).to include("increment agent.invalid_metric")
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
326
|
it "should allow reasonable metric names" do
|
326
327
|
agent.increment('a')
|
327
328
|
agent.increment('a.b')
|
@@ -399,9 +400,9 @@ shared_examples "Instrumental Agent" do
|
|
399
400
|
|
400
401
|
it "should allow flushing pending values to the server" do
|
401
402
|
1.upto(100) { agent.gauge('a', rand(50)) }
|
402
|
-
expect(agent.instance_variable_get(:@
|
403
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to be > 0
|
403
404
|
agent.flush
|
404
|
-
expect(agent.instance_variable_get(:@
|
405
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
405
406
|
wait do
|
406
407
|
expect(server.commands.grep(/^gauge a /).size).to eq(100)
|
407
408
|
end
|
@@ -439,7 +440,7 @@ shared_examples "Instrumental Agent" do
|
|
439
440
|
agent.increment('reconnect_test', 1, 1234)
|
440
441
|
wait
|
441
442
|
# The agent should not have sent the metric yet, the server is not responding
|
442
|
-
expect(agent.
|
443
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
443
444
|
end
|
444
445
|
|
445
446
|
it "should warn once when buffer is full" do
|
@@ -474,7 +475,7 @@ shared_examples "Instrumental Agent" do
|
|
474
475
|
agent.increment('reconnect_test', 1, 1234)
|
475
476
|
wait
|
476
477
|
# Since server hasn't responded to hello or authenticate, worker thread will not send data
|
477
|
-
expect(agent.
|
478
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
478
479
|
end
|
479
480
|
end
|
480
481
|
|
@@ -495,7 +496,7 @@ shared_examples "Instrumental Agent" do
|
|
495
496
|
wait do
|
496
497
|
expect(agent.send(:running?)).to eq(false)
|
497
498
|
end
|
498
|
-
expect(agent.
|
499
|
+
expect(agent.sender_queue.size).to eq(1)
|
499
500
|
end
|
500
501
|
|
501
502
|
it "should restart the worker thread after hanging it up during an unreachable host event" do
|
@@ -514,7 +515,7 @@ shared_examples "Instrumental Agent" do
|
|
514
515
|
wait do
|
515
516
|
expect(agent.send(:running?)).to eq(false)
|
516
517
|
end
|
517
|
-
expect(agent.
|
518
|
+
expect(agent.sender_queue.size).to eq(1)
|
518
519
|
# Start the server back up again
|
519
520
|
server.listen
|
520
521
|
# Sending another metric should kickstart the background worker thread
|
@@ -522,7 +523,7 @@ shared_examples "Instrumental Agent" do
|
|
522
523
|
# The agent should now be running the background thread, and the queue should be empty
|
523
524
|
wait do
|
524
525
|
expect(agent.send(:running?)).to eq(true)
|
525
|
-
expect(agent.
|
526
|
+
expect(agent.sender_queue.size).to eq(0)
|
526
527
|
end
|
527
528
|
end
|
528
529
|
|
@@ -547,7 +548,7 @@ shared_examples "Instrumental Agent" do
|
|
547
548
|
expect(agent.send(:running?)).to eq(false)
|
548
549
|
end
|
549
550
|
# The command is not in the queue
|
550
|
-
expect(agent.
|
551
|
+
expect(agent.sender_queue.size).to eq(0)
|
551
552
|
# allow the agent to behave normally
|
552
553
|
test_connection_fail = false
|
553
554
|
# Sending another metric should kickstart the background worker thread
|
@@ -555,7 +556,7 @@ shared_examples "Instrumental Agent" do
|
|
555
556
|
# The agent should now be running the background thread, and the queue should be empty
|
556
557
|
wait do
|
557
558
|
expect(agent.send(:running?)).to eq(true)
|
558
|
-
expect(agent.
|
559
|
+
expect(agent.sender_queue.size).to eq(0)
|
559
560
|
expect(server.commands.grep(/connection_failure/).size).to eq(2)
|
560
561
|
end
|
561
562
|
end
|
@@ -580,7 +581,7 @@ shared_examples "Instrumental Agent" do
|
|
580
581
|
agent.gauge('connection_failure_3', 1, 1234)
|
581
582
|
wait do
|
582
583
|
expect(agent.instance_variable_get(:@failures)).to be > 0
|
583
|
-
expect(agent.
|
584
|
+
expect(agent.sender_queue.size).to be > 0
|
584
585
|
end
|
585
586
|
|
586
587
|
# let the loop proceed
|
@@ -588,12 +589,11 @@ shared_examples "Instrumental Agent" do
|
|
588
589
|
|
589
590
|
wait do
|
590
591
|
expect(agent.send(:running?)).to eq(true)
|
591
|
-
expect(agent.
|
592
|
+
expect(agent.sender_queue.size).to eq(0)
|
592
593
|
end
|
593
594
|
end
|
594
595
|
end
|
595
596
|
|
596
|
-
|
597
597
|
context 'not authenticating' do
|
598
598
|
# Server will fail all authentication attempts
|
599
599
|
let(:authenticate) { false }
|
@@ -602,7 +602,7 @@ shared_examples "Instrumental Agent" do
|
|
602
602
|
agent.increment('reconnect_test', 1, 1234)
|
603
603
|
wait
|
604
604
|
# Metrics should not have been sent since all authentication failed
|
605
|
-
expect(agent.
|
605
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
606
606
|
end
|
607
607
|
end
|
608
608
|
|
@@ -639,7 +639,7 @@ shared_examples "Instrumental Agent" do
|
|
639
639
|
it "should not wait to exit a process if there are no commands queued" do
|
640
640
|
allow(agent).to receive(:open_socket) { |*args, &block| sleep(5) && block.call }
|
641
641
|
with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
|
642
|
-
if (pid = fork { agent.increment('foo', 1); agent.
|
642
|
+
if (pid = fork { agent.increment('foo', 1); agent.sender_queue.clear })
|
643
643
|
tm = Time.now.to_f
|
644
644
|
Process.wait(pid)
|
645
645
|
diff = Time.now.to_f - tm
|
@@ -725,7 +725,7 @@ shared_examples "Instrumental Agent" do
|
|
725
725
|
expect(agent.send(:running?)).to eq(true)
|
726
726
|
|
727
727
|
# Setup a failure for the next command so we'll break out of the inner
|
728
|
-
# loop in
|
728
|
+
# loop in run_sender_loop causing another call to open_socket
|
729
729
|
test_connection_fail = true
|
730
730
|
tc = agent.method(:test_connection)
|
731
731
|
allow(agent).to receive(:test_connection) { |*args, &block| test_connection_fail ? raise("fail") : tc.call(*args) }
|
@@ -802,6 +802,303 @@ shared_examples "Instrumental Agent" do
|
|
802
802
|
end
|
803
803
|
end
|
804
804
|
end
|
805
|
+
|
806
|
+
describe Instrumental::Agent, "aggregation" do
|
807
|
+
context "aggregation enabled" do
|
808
|
+
let(:frequency) { 2 }
|
809
|
+
|
810
|
+
it "can be enabled at Agent.new time" do
|
811
|
+
expect(agent.frequency).to eq(2)
|
812
|
+
end
|
813
|
+
|
814
|
+
it "can be modified by setting the agent frequency" do
|
815
|
+
agent.frequency = 15
|
816
|
+
expect(agent.frequency).to eq(15)
|
817
|
+
end
|
818
|
+
|
819
|
+
it "is disabled by default" do
|
820
|
+
agent = Instrumental::Agent.new('test_token')
|
821
|
+
expect(agent.frequency.to_f).to eq(0)
|
822
|
+
end
|
823
|
+
|
824
|
+
it "should only allow frequencies that align with minutes" do
|
825
|
+
(-5..100).each do |freq|
|
826
|
+
agent.frequency = freq
|
827
|
+
expect(Instrumental::Agent::VALID_FREQUENCIES).to include(agent.frequency)
|
828
|
+
end
|
829
|
+
end
|
830
|
+
|
831
|
+
it "bypasses aggregator queue entirely for most commands when frequency == 0" do
|
832
|
+
agent.frequency = 0 # this is red - 0 for green
|
833
|
+
expect(EventAggregator).not_to receive(:new)
|
834
|
+
agent.increment('a_metric')
|
835
|
+
end
|
836
|
+
|
837
|
+
it "adds data to the event aggregator and does not immediately send it" do
|
838
|
+
Timecop.travel start_of_minute
|
839
|
+
agent.increment('test')
|
840
|
+
wait do
|
841
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
842
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.values.first.metric).to eq('test')
|
843
|
+
end
|
844
|
+
end
|
845
|
+
|
846
|
+
it "batches data before sending" do
|
847
|
+
Timecop.freeze do
|
848
|
+
agent.increment('a_metric')
|
849
|
+
agent.increment('a_metric')
|
850
|
+
agent.increment('another_metric')
|
851
|
+
end
|
852
|
+
agent.flush(true)
|
853
|
+
wait do
|
854
|
+
expect(server.commands.grep(/_metric/).size).to eq(2)
|
855
|
+
aggregated_metric = server.commands.grep(/a_metric/).first.split(" ")
|
856
|
+
expect(aggregated_metric[2].to_i).to eq(2) # value
|
857
|
+
expect(aggregated_metric[4].to_i).to eq(2) # count
|
858
|
+
end
|
859
|
+
end
|
860
|
+
|
861
|
+
it "aggregates to the specified frequency within the aggregator" do
|
862
|
+
Timecop.travel(start_of_minute)
|
863
|
+
agent.frequency = 15
|
864
|
+
expect(agent.frequency).not_to be(Instrumental::Agent::DEFAULT_FREQUENCY)
|
865
|
+
agent.increment('metric', 1, Time.at(0))
|
866
|
+
|
867
|
+
# will get aligned to the closest frequency (15)
|
868
|
+
agent.increment('metric', 1, Time.at(20))
|
869
|
+
wait do
|
870
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.keys).to eq(["metric:0", "metric:15"])
|
871
|
+
end
|
872
|
+
agent.flush
|
873
|
+
wait do
|
874
|
+
expect(server.commands.grep(/metric 1 0/).size).to eq(1)
|
875
|
+
expect(server.commands.grep(/metric 1 15/).size).to eq(1)
|
876
|
+
end
|
877
|
+
end
|
878
|
+
|
879
|
+
it "flushes data from both queues before sending" do
|
880
|
+
Timecop.freeze do
|
881
|
+
100.times do |i|
|
882
|
+
agent.increment("test_metric_#{i}")
|
883
|
+
agent.increment("other_metric")
|
884
|
+
end
|
885
|
+
end
|
886
|
+
|
887
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to be > 0
|
888
|
+
agent.flush
|
889
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
890
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
891
|
+
|
892
|
+
wait do
|
893
|
+
expect(server.commands.grep(/test_metric/).size).to eq(100)
|
894
|
+
expect(server.commands.grep(/other_metric/).size).to eq(1)
|
895
|
+
end
|
896
|
+
end
|
897
|
+
|
898
|
+
it "does not batch notices" do
|
899
|
+
agent.frequency = 60
|
900
|
+
agent.notice "things are happening", 0, 100
|
901
|
+
agent.notice "things are happening", 0, 100
|
902
|
+
agent.notice "things are happening", 0, 100
|
903
|
+
wait do
|
904
|
+
expect(server.commands.grep(/things are happening/).size).to eq(3)
|
905
|
+
end
|
906
|
+
end
|
907
|
+
|
908
|
+
it "can be disabled by setting frequency to nil" do
|
909
|
+
agent.frequency = nil
|
910
|
+
expect(EventAggregator).not_to receive(:new)
|
911
|
+
agent.increment('metric')
|
912
|
+
wait do
|
913
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
it "can be disabled by setting frequency to 0" do
|
918
|
+
agent.frequency = 0
|
919
|
+
expect(EventAggregator).not_to receive(:new)
|
920
|
+
agent.increment('metric')
|
921
|
+
wait do
|
922
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
923
|
+
end
|
924
|
+
end
|
925
|
+
|
926
|
+
it "automatically uses the highest-without-going-over frequency for a bad frequency" do
|
927
|
+
agent.frequency = 17
|
928
|
+
expect(agent.frequency).to eq(15)
|
929
|
+
agent.frequency = 69420
|
930
|
+
expect(agent.frequency).to eq(60)
|
931
|
+
agent.frequency = 0
|
932
|
+
expect(agent.frequency).to eq(0)
|
933
|
+
agent.frequency = -1
|
934
|
+
expect(agent.frequency).to eq(0)
|
935
|
+
end
|
936
|
+
|
937
|
+
it "can take strings as frequency" do
|
938
|
+
agent = Instrumental::Agent.new('test_token', :frequency => "15")
|
939
|
+
expect(agent.frequency).to eq(15)
|
940
|
+
end
|
941
|
+
|
942
|
+
it "should not be enabled at the same time as synchronous" do
|
943
|
+
expect(Instrumental::Agent.logger).to receive(:warn).with(/Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode./)
|
944
|
+
agent = Instrumental::Agent.new('test_token', :synchronous => true, :frequency => 6)
|
945
|
+
expect(agent.synchronous).to eq(true)
|
946
|
+
expect(agent.frequency).to eq(0)
|
947
|
+
end
|
948
|
+
|
949
|
+
it "should use synchronous mode if it is enabled, even if turned on after frequency set at start" do
|
950
|
+
agent.increment('metric')
|
951
|
+
agent.increment('metric')
|
952
|
+
agent.synchronous = true
|
953
|
+
agent.increment('metric')
|
954
|
+
wait do
|
955
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
956
|
+
end
|
957
|
+
agent.flush
|
958
|
+
wait do
|
959
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
960
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
961
|
+
end
|
962
|
+
end
|
963
|
+
|
964
|
+
it "sends aggregated metrics after specified frequency, even if no flush is sent" do
|
965
|
+
agent.frequency = 1
|
966
|
+
Timecop.travel(start_of_minute)
|
967
|
+
agent.increment('metric')
|
968
|
+
agent.increment('metric')
|
969
|
+
agent.gauge('other', 1)
|
970
|
+
agent.gauge('other', 1)
|
971
|
+
agent.gauge('other', 1)
|
972
|
+
sleep (0.5)
|
973
|
+
wait { expect(server.commands.grep(/metric/).size).to eq(0) }
|
974
|
+
sleep (0.51) # total sleep > 1 frequency
|
975
|
+
|
976
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
977
|
+
expect(server.commands.grep(/other 3/).size).to eq(1)
|
978
|
+
end
|
979
|
+
|
980
|
+
# this test really relies on the worker threads not working unexpectedly
|
981
|
+
it "will overflow if the aggregator queue is full" do
|
982
|
+
Timecop.travel(start_of_minute)
|
983
|
+
with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
|
984
|
+
allow(agent.logger).to receive(:debug)
|
985
|
+
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
|
986
|
+
agent.increment('overflow_test', 4, 300, 1)
|
987
|
+
agent.increment('overflow_test', 4, 300, 1)
|
988
|
+
agent.increment('overflow_test', 4, 300, 1)
|
989
|
+
agent.increment('overflow_test', 4, 300, 1)
|
990
|
+
|
991
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
|
992
|
+
agent.flush
|
993
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
994
|
+
end
|
995
|
+
end
|
996
|
+
|
997
|
+
it "if aggregator is at max size, next command will force a forward to the sender thread" do
|
998
|
+
Timecop.travel(start_of_minute)
|
999
|
+
with_constants('Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 3) do
|
1000
|
+
agent.increment('overflow_test1')
|
1001
|
+
agent.increment('overflow_test2')
|
1002
|
+
agent.increment('overflow_test3')
|
1003
|
+
agent.increment('overflow_test4')
|
1004
|
+
agent.increment('overflow_test5')
|
1005
|
+
|
1006
|
+
# only 1 because the 5th command triggers a forward of the first 4
|
1007
|
+
wait do
|
1008
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
1009
|
+
end
|
1010
|
+
agent.flush
|
1011
|
+
wait do
|
1012
|
+
expect(server.commands.grep(/overflow_test/).size).to eq(5)
|
1013
|
+
end
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
context do
|
1018
|
+
let(:listen) { false }
|
1019
|
+
it "will not send aggregators to the sender queue if the sender thread is not ready" do
|
1020
|
+
Timecop.travel(start_of_minute)
|
1021
|
+
agent.frequency = 1
|
1022
|
+
|
1023
|
+
with_constants('Instrumental::Agent::MAX_BUFFER' => 3,
|
1024
|
+
'Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 4) do
|
1025
|
+
|
1026
|
+
# fill the queue
|
1027
|
+
agent.increment('overflow_test1')
|
1028
|
+
agent.increment('overflow_test2')
|
1029
|
+
agent.increment('overflow_test3')
|
1030
|
+
|
1031
|
+
# wait until they are all in the aggregator
|
1032
|
+
wait do
|
1033
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1034
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1035
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
# fill the queue again
|
1039
|
+
agent.increment('overflow_test1')
|
1040
|
+
agent.increment('overflow_test2')
|
1041
|
+
agent.increment('overflow_test3')
|
1042
|
+
|
1043
|
+
# wait until they are all in the aggregator
|
1044
|
+
wait do
|
1045
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1046
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1047
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# wait for the aggregator to get forwarded and popped by the sender
|
1051
|
+
wait do
|
1052
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1053
|
+
expect(agent.instance_variable_get(:@event_aggregator)).to eq(nil)
|
1054
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
# fill the queue again
|
1058
|
+
agent.increment('overflow_test4')
|
1059
|
+
agent.increment('overflow_test5')
|
1060
|
+
agent.increment('overflow_test6')
|
1061
|
+
|
1062
|
+
# wait for them all to be in the aggregator
|
1063
|
+
wait do
|
1064
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1065
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1066
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
# sleep until the next forward is done
|
1070
|
+
sleep(agent.frequency + 0.1)
|
1071
|
+
|
1072
|
+
# fill the queue again
|
1073
|
+
agent.increment('overflow_test7')
|
1074
|
+
agent.increment('overflow_test8')
|
1075
|
+
agent.increment('overflow_test9')
|
1076
|
+
|
1077
|
+
# because sending is blocked, the prevous aggregator never sent
|
1078
|
+
# when it hits max size, the aggregator queue starts backing up
|
1079
|
+
wait do
|
1080
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(1)
|
1081
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1082
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
# send 3 more items, to overflow the aggregator queue
|
1086
|
+
allow(agent.logger).to receive(:debug)
|
1087
|
+
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_testc 4 300 1")
|
1088
|
+
agent.increment('overflow_testa')
|
1089
|
+
agent.increment('overflow_testb')
|
1090
|
+
agent.increment('overflow_testc', 4, 300, 1) # will get dropped
|
1091
|
+
|
1092
|
+
wait do
|
1093
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
|
1094
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1095
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1096
|
+
end
|
1097
|
+
end
|
1098
|
+
end
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
end
|
805
1102
|
end
|
806
1103
|
end
|
807
1104
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
describe Instrumental::Command, "basic functions of command structs" do
|
5
|
+
it "should not allow bad arguments to command#+" do
|
6
|
+
command = Instrumental::Command.new("gauge", "abc", 1, Time.at(0), 1)
|
7
|
+
|
8
|
+
# nil is a no-op
|
9
|
+
expect(command + nil).to eq(command)
|
10
|
+
# it will change the top of the other command
|
11
|
+
expect(command + Instrumental::Command.new("increment", "abc", 1, Time.at(0), 1))
|
12
|
+
.to eq(Instrumental::Command.new("gauge", "abc", 2, Time.at(0), 2))
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should add together with like commands" do
|
16
|
+
command = Instrumental::Command.new("gauge", "abc", 1, Time.at(0), 1)
|
17
|
+
other = Instrumental::Command.new("gauge", "abc", 2, Time.at(0), 4)
|
18
|
+
expect(command + other).to eq(Instrumental::Command.new("gauge", "abc", 3, Time.at(0), 5))
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EventAggregator, "time and frequency operations" do
|
4
|
+
it "should massage time values to match the start of a window" do
|
5
|
+
agg = EventAggregator.new(frequency: 10)
|
6
|
+
Timecop.freeze do
|
7
|
+
start_of_minute = Time.now.to_i - (Time.now.to_i % 60)
|
8
|
+
times_to_report = [start_of_minute + 5, start_of_minute + 15]
|
9
|
+
|
10
|
+
times_to_report.each do |at_time|
|
11
|
+
agg.put(Instrumental::Command.new("gauge", "abc", 5, Time.at(at_time), 1))
|
12
|
+
end
|
13
|
+
|
14
|
+
expect(agg.size).to eq(2)
|
15
|
+
|
16
|
+
expected_values = [Instrumental::Command.new("gauge", "abc", 5, Time.at(start_of_minute), 1),
|
17
|
+
Instrumental::Command.new("gauge", "abc", 5, Time.at(start_of_minute + 10), 1)]
|
18
|
+
expect(agg.values.values).to eq(expected_values)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe EventAggregator do
|
24
|
+
it "should aggregate put operations to a given frequency" do
|
25
|
+
start_of_minute = Time.now.to_i - (Time.now.to_i % 60)
|
26
|
+
Timecop.freeze(Time.at(start_of_minute)) do
|
27
|
+
agg = EventAggregator.new(frequency: 30)
|
28
|
+
(Time.now.to_i..(Time.now.to_i + 119)).each do |time|
|
29
|
+
agg.put(Instrumental::Command.new("increment", "abc", 1, time, 1))
|
30
|
+
end
|
31
|
+
expect(agg.size).to eq(4)
|
32
|
+
(Time.now.to_i..(Time.now.to_i + 119)).step(30).map do |time|
|
33
|
+
expect(agg.values["abc:#{time}"]).to eq(Instrumental::Command.new("increment", "abc", 30, time, 30))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should aggregate put operations to the same metric and last type wins" do
|
39
|
+
Timecop.freeze do
|
40
|
+
agg = EventAggregator.new(frequency: 6)
|
41
|
+
|
42
|
+
agg.put(Instrumental::Command.new("gauge", "hello", 3.0, Time.now, 1))
|
43
|
+
agg.put(Instrumental::Command.new("increment", "hello", 4.0, Time.now, 1))
|
44
|
+
|
45
|
+
expect(agg.size).to eq(1)
|
46
|
+
expect(agg.values.values.first).to eq(Instrumental::Command.new("increment",
|
47
|
+
"hello",
|
48
|
+
7.0,
|
49
|
+
agg.coerce_time(Time.now),
|
50
|
+
2))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: instrumental_agent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Expected Behavior
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: metrician
|
@@ -118,12 +118,16 @@ files:
|
|
118
118
|
- lib/instrumental/capistrano.rb
|
119
119
|
- lib/instrumental/capistrano/capistrano2.rb
|
120
120
|
- lib/instrumental/capistrano/capistrano3.rake
|
121
|
+
- lib/instrumental/command_structs.rb
|
122
|
+
- lib/instrumental/event_aggregator.rb
|
121
123
|
- lib/instrumental/system_timer.rb
|
122
124
|
- lib/instrumental/version.rb
|
123
125
|
- lib/instrumental_agent.rb
|
124
126
|
- script/setup
|
125
127
|
- script/test
|
126
128
|
- spec/agent_spec.rb
|
129
|
+
- spec/command_struct_specs.rb
|
130
|
+
- spec/event_aggregator_spec.rb
|
127
131
|
- spec/spec_helper.rb
|
128
132
|
- spec/test.crt
|
129
133
|
- spec/test.csr
|
@@ -133,7 +137,7 @@ homepage: http://github.com/instrumental/instrumental_agent-ruby
|
|
133
137
|
licenses:
|
134
138
|
- MIT
|
135
139
|
metadata: {}
|
136
|
-
post_install_message:
|
140
|
+
post_install_message:
|
137
141
|
rdoc_options: []
|
138
142
|
require_paths:
|
139
143
|
- lib
|
@@ -141,7 +145,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
141
145
|
requirements:
|
142
146
|
- - ">="
|
143
147
|
- !ruby/object:Gem::Version
|
144
|
-
version: 2.
|
148
|
+
version: 2.5.7
|
145
149
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
150
|
requirements:
|
147
151
|
- - ">"
|
@@ -149,11 +153,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
153
|
version: 1.3.1
|
150
154
|
requirements: []
|
151
155
|
rubygems_version: 3.0.3
|
152
|
-
signing_key:
|
156
|
+
signing_key:
|
153
157
|
specification_version: 4
|
154
158
|
summary: Custom metric monitoring for Ruby applications via Instrumental
|
155
159
|
test_files:
|
156
160
|
- spec/agent_spec.rb
|
161
|
+
- spec/command_struct_specs.rb
|
162
|
+
- spec/event_aggregator_spec.rb
|
157
163
|
- spec/spec_helper.rb
|
158
164
|
- spec/test.crt
|
159
165
|
- spec/test.csr
|