instrumental_agent 2.0.0 → 3.0.0.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/.travis.yml +4 -5
- data/CHANGELOG.md +15 -0
- data/Gemfile +1 -6
- data/README.md +22 -0
- data/instrumental_agent.gemspec +2 -2
- data/lib/instrumental/agent.rb +277 -163
- data/lib/instrumental/capistrano.rb +4 -46
- data/lib/instrumental/capistrano/capistrano2.rb +47 -0
- data/lib/instrumental/capistrano/capistrano3.rake +56 -0
- data/lib/instrumental/command_structs.rb +32 -0
- data/lib/instrumental/event_aggregator.rb +28 -0
- data/lib/instrumental/version.rb +1 -1
- data/spec/agent_spec.rb +419 -43
- data/spec/command_struct_specs.rb +20 -0
- data/spec/event_aggregator_spec.rb +53 -0
- data/spec/spec_helper.rb +9 -0
- metadata +35 -34
- data/certs/equifax.ca.pem +0 -69
- data/certs/geotrust.ca.pem +0 -80
- data/certs/rapidssl.ca.pem +0 -94
@@ -1,47 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
if Capistrano::Configuration.instance
|
6
|
-
Capistrano::Configuration.instance.load do
|
7
|
-
namespace :instrumental do
|
8
|
-
namespace :util do
|
9
|
-
desc "marker for beginning of deploy"
|
10
|
-
task :deploy_start do
|
11
|
-
set :instrumental_deploy_start, Time.now
|
12
|
-
end
|
13
|
-
|
14
|
-
desc "marker for end of deploy"
|
15
|
-
task :deploy_end do
|
16
|
-
set :instrumental_deploy_end, Time.now
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
desc "send a notice to instrumental about the deploy"
|
21
|
-
task :record_deploy_notice do
|
22
|
-
start_at = exists?(:instrumental_deploy_start) ? instrumental_deploy_start : Time.now
|
23
|
-
end_at = exists?(:instrumental_deploy_end) ? instrumental_deploy_end : start_at
|
24
|
-
deploy_duration_in_seconds = end_at - start_at
|
25
|
-
deployer = Etc.getlogin.chomp
|
26
|
-
agent_options = { :synchronous => true }
|
27
|
-
agent_options[:collector] = instrumental_host if exists?(:instrumental_host)
|
28
|
-
agent = Instrumental::Agent.new(instrumental_key, agent_options)
|
29
|
-
message = if exists?(:deploy_message)
|
30
|
-
deploy_message
|
31
|
-
else
|
32
|
-
"#{deployer} deployed #{current_revision}"
|
33
|
-
end
|
34
|
-
agent.notice(message,
|
35
|
-
start_at,
|
36
|
-
deploy_duration_in_seconds)
|
37
|
-
logger.info("Notified Instrumental of deployment")
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
before "deploy", "instrumental:util:deploy_start"
|
42
|
-
after "deploy", "instrumental:util:deploy_end"
|
43
|
-
before "deploy:migrations", "instrumental:util:deploy_start"
|
44
|
-
after "deploy:migrations", "instrumental:util:deploy_end"
|
45
|
-
after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
|
46
|
-
end
|
1
|
+
if Gem::Specification.find_by_name("capistrano").version >= Gem::Version.new("3.0.0")
|
2
|
+
load File.expand_path("../capistrano/capistrano3.rake", __FILE__)
|
3
|
+
else
|
4
|
+
require_relative "capistrano/capistrano2"
|
47
5
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "etc"
|
2
|
+
require "instrumental_agent"
|
3
|
+
|
4
|
+
Capistrano::Configuration.instance.load do
|
5
|
+
_cset(:instrumental_hooks) { true }
|
6
|
+
_cset(:instrumental_key) { nil }
|
7
|
+
_cset(:deployer) { Etc.getlogin.chomp }
|
8
|
+
|
9
|
+
if fetch(:instrumental_hooks)
|
10
|
+
before "deploy", "instrumental:util:deploy_start"
|
11
|
+
after "deploy", "instrumental:util:deploy_end"
|
12
|
+
before "deploy:migrations", "instrumental:util:deploy_start"
|
13
|
+
after "deploy:migrations", "instrumental:util:deploy_end"
|
14
|
+
after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
|
15
|
+
end
|
16
|
+
|
17
|
+
namespace :instrumental do
|
18
|
+
namespace :util do
|
19
|
+
desc "marker for beginning of deploy"
|
20
|
+
task :deploy_start do
|
21
|
+
set :instrumental_deploy_start, Time.now
|
22
|
+
end
|
23
|
+
|
24
|
+
desc "marker for end of deploy"
|
25
|
+
task :deploy_end do
|
26
|
+
set :instrumental_deploy_end, Time.now
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "send a notice to instrumental about the deploy"
|
31
|
+
task :record_deploy_notice do
|
32
|
+
start_at = fetch(:instrumental_deploy_start, Time.now)
|
33
|
+
end_at = fetch(:instrumental_deploy_end, start_at)
|
34
|
+
deploy_duration_in_seconds = end_at - start_at
|
35
|
+
deployer = fetch(:deployer)
|
36
|
+
agent_options = { :synchronous => true }
|
37
|
+
agent_options[:collector] = instrumental_host if fetch(:instrumental_host, false)
|
38
|
+
agent = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
|
39
|
+
message = fetch(:deploy_message, "#{deployer} deployed #{current_revision}")
|
40
|
+
|
41
|
+
agent.notice(message,
|
42
|
+
start_at,
|
43
|
+
deploy_duration_in_seconds)
|
44
|
+
logger.info("Notified Instrumental of deployment")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "etc"
|
2
|
+
require "instrumental_agent"
|
3
|
+
|
4
|
+
namespace :load do
|
5
|
+
task :defaults do
|
6
|
+
set :instrumental_hooks, true
|
7
|
+
set :instrumental_key, nil
|
8
|
+
set :deployer, Etc.getlogin.chomp
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
namespace :deploy do
|
13
|
+
before :starting, :check_instrumental_hooks do
|
14
|
+
invoke "instrumental:util:add_hooks" if fetch(:instrumental_hooks)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
namespace :instrumental do
|
19
|
+
namespace :util do
|
20
|
+
desc "add instrumental hooks to deploy"
|
21
|
+
task :add_hooks do
|
22
|
+
before "deploy", "instrumental:util:deploy_start"
|
23
|
+
after "deploy", "instrumental:util:deploy_end"
|
24
|
+
after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "marker for beginning of deploy"
|
28
|
+
task :deploy_start do
|
29
|
+
set :instrumental_deploy_start, Time.now
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "marker for end of deploy"
|
33
|
+
task :deploy_end do
|
34
|
+
set :instrumental_deploy_end, Time.now
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "send a notice to instrumental about the deploy"
|
39
|
+
task :record_deploy_notice do
|
40
|
+
start_at = fetch(:instrumental_deploy_start, Time.now)
|
41
|
+
end_at = fetch(:instrumental_deploy_end, start_at)
|
42
|
+
deploy_duration_in_seconds = end_at - start_at
|
43
|
+
deployer = fetch(:deployer)
|
44
|
+
agent_options = { :synchronous => true }
|
45
|
+
agent_options[:collector] = instrumental_host if fetch(:instrumental_host, false)
|
46
|
+
message = fetch(:deploy_message, "#{deployer} deployed #{fetch(:current_revision)}".strip)
|
47
|
+
|
48
|
+
if fetch(:instrumental_key)
|
49
|
+
agent = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
|
50
|
+
agent.notice(message,
|
51
|
+
start_at,
|
52
|
+
deploy_duration_in_seconds)
|
53
|
+
puts "Notified Instrumental of deployment"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Instrumental
|
2
|
+
METRIC_TYPES = ["increment".freeze, "gauge".freeze].freeze
|
3
|
+
|
4
|
+
Command = Struct.new(:command, :metric, :value, :time, :count) do
|
5
|
+
def initialize(command, metric, value, time, count)
|
6
|
+
super(command, metric, value, time.to_i, count.to_i)
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
[command, metric, value, time, count].map(&:to_s).join(" ")
|
11
|
+
end
|
12
|
+
|
13
|
+
def metadata
|
14
|
+
"#{metric}:#{time}".freeze
|
15
|
+
end
|
16
|
+
|
17
|
+
def +(other_command)
|
18
|
+
return self if other_command.nil?
|
19
|
+
Command.new(command, metric, value + other_command.value, time, count + other_command.count)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Notice = Struct.new(:note, :time, :duration) do
|
24
|
+
def initialize(note, time, duration)
|
25
|
+
super(note, time.to_i, duration.to_i)
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s
|
29
|
+
["notice".freeze, time, duration, note].map(&:to_s).join(" ")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Instrumental
|
2
|
+
class EventAggregator
|
3
|
+
attr_accessor :counts, :values, :received_at, :frequency
|
4
|
+
|
5
|
+
def initialize(frequency:)
|
6
|
+
@values = Hash.new
|
7
|
+
@frequency = frequency
|
8
|
+
end
|
9
|
+
|
10
|
+
def put(command)
|
11
|
+
command_at = command.time
|
12
|
+
unless(command_at % frequency == 0)
|
13
|
+
command.time = (command_at - (command_at % frequency))
|
14
|
+
end
|
15
|
+
metadata = command.metadata
|
16
|
+
@values[metadata] = (command + @values[metadata])
|
17
|
+
end
|
18
|
+
|
19
|
+
def size
|
20
|
+
@values.size
|
21
|
+
end
|
22
|
+
|
23
|
+
def coerce_time(time)
|
24
|
+
itime = time.to_i
|
25
|
+
(itime - (itime % frequency)).to_i
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/instrumental/version.rb
CHANGED
data/spec/agent_spec.rb
CHANGED
@@ -9,6 +9,8 @@ def wait(n=0.2, &block)
|
|
9
9
|
if (Time.now - start) < 5
|
10
10
|
sleep n
|
11
11
|
retry
|
12
|
+
else
|
13
|
+
raise ex
|
12
14
|
end
|
13
15
|
end
|
14
16
|
else
|
@@ -37,7 +39,8 @@ shared_examples "Instrumental Agent" do
|
|
37
39
|
let(:token) { 'test_token' }
|
38
40
|
let(:address) { server.host_and_port }
|
39
41
|
let(:metrician) { false }
|
40
|
-
let(:
|
42
|
+
let(:frequency) { 0 }
|
43
|
+
let(:agent) { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician, :frequency => frequency) }
|
41
44
|
|
42
45
|
# Server options
|
43
46
|
let(:listen) { true }
|
@@ -45,6 +48,12 @@ shared_examples "Instrumental Agent" do
|
|
45
48
|
let(:authenticate) { true }
|
46
49
|
let(:server) { TestServer.new(:listen => listen, :authenticate => authenticate, :response => response, :secure => secure?) }
|
47
50
|
|
51
|
+
# Time Travel Options
|
52
|
+
let(:start_of_minute) do
|
53
|
+
now = Time.now.to_i
|
54
|
+
Time.at(now - (now % 60))
|
55
|
+
end
|
56
|
+
|
48
57
|
before do
|
49
58
|
Instrumental::Agent.logger.level = Logger::UNKNOWN
|
50
59
|
@server = server
|
@@ -226,16 +235,16 @@ shared_examples "Instrumental Agent" do
|
|
226
235
|
allow(agent.logger).to receive(:debug)
|
227
236
|
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
|
228
237
|
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 5 300 1")
|
229
|
-
5
|
230
|
-
agent.increment('overflow_test', i
|
231
|
-
end
|
232
|
-
wait do
|
233
|
-
expect(server.commands).to include("increment overflow_test 1 300 1")
|
234
|
-
expect(server.commands).to include("increment overflow_test 2 300 1")
|
235
|
-
expect(server.commands).to include("increment overflow_test 3 300 1")
|
236
|
-
expect(server.commands).to_not include("increment overflow_test 4 300 1")
|
237
|
-
expect(server.commands).to_not include("increment overflow_test 5 300 1")
|
238
|
+
1.upto(5) do |i|
|
239
|
+
agent.increment('overflow_test', i, 300)
|
238
240
|
end
|
241
|
+
|
242
|
+
wait
|
243
|
+
expect(agent.sender_queue.size).to eq(3)
|
244
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 1 300 1")
|
245
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 2 300 1")
|
246
|
+
expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 3 300 1")
|
247
|
+
expect(agent.sender_queue.size).to eq(0)
|
239
248
|
end
|
240
249
|
end
|
241
250
|
end
|
@@ -246,7 +255,7 @@ shared_examples "Instrumental Agent" do
|
|
246
255
|
5.times do |i|
|
247
256
|
agent.increment('overflow_test', i + 1, 300)
|
248
257
|
end
|
249
|
-
expect(agent.instance_variable_get(:@
|
258
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
250
259
|
wait # let the server receive the commands
|
251
260
|
expect(server.commands).to include("increment overflow_test 1 300 1")
|
252
261
|
expect(server.commands).to include("increment overflow_test 2 300 1")
|
@@ -262,8 +271,10 @@ shared_examples "Instrumental Agent" do
|
|
262
271
|
fork do
|
263
272
|
agent.increment('fork_reconnect_test', 1, 3) # triggers reconnect
|
264
273
|
end
|
274
|
+
|
265
275
|
wait(1)
|
266
276
|
agent.increment('fork_reconnect_test', 1, 4) # triggers reconnect
|
277
|
+
|
267
278
|
wait(1)
|
268
279
|
expect(server.connect_count).to eq(2)
|
269
280
|
|
@@ -279,17 +290,17 @@ shared_examples "Instrumental Agent" do
|
|
279
290
|
sleep 1
|
280
291
|
}
|
281
292
|
|
282
|
-
|
283
|
-
allow(agent).to receive(:
|
284
|
-
|
293
|
+
run_sender_loop_calls = 0
|
294
|
+
allow(agent).to receive(:run_sender_loop) {
|
295
|
+
run_sender_loop_calls += 1
|
285
296
|
sleep 3 # keep the worker thread alive
|
286
297
|
}
|
287
298
|
|
288
299
|
t = Thread.new { agent.increment("race") }
|
289
300
|
agent.increment("race")
|
290
301
|
wait(2)
|
291
|
-
expect(
|
292
|
-
expect(agent.
|
302
|
+
expect(run_sender_loop_calls).to eq(1)
|
303
|
+
expect(agent.sender_queue.size).to eq(2)
|
293
304
|
end
|
294
305
|
|
295
306
|
it "should never let an exception reach the user" do
|
@@ -312,14 +323,6 @@ shared_examples "Instrumental Agent" do
|
|
312
323
|
expect(agent.increment("test")).to eq(nil)
|
313
324
|
end
|
314
325
|
|
315
|
-
it "should track invalid metrics" do
|
316
|
-
expect(agent.logger).to receive(:warn).with(/%%/)
|
317
|
-
agent.increment(' %% .!#@$%^&*', 1, 1)
|
318
|
-
wait do
|
319
|
-
expect(server.commands.join("\n")).to include("increment agent.invalid_metric")
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
326
|
it "should allow reasonable metric names" do
|
324
327
|
agent.increment('a')
|
325
328
|
agent.increment('a.b')
|
@@ -397,9 +400,9 @@ shared_examples "Instrumental Agent" do
|
|
397
400
|
|
398
401
|
it "should allow flushing pending values to the server" do
|
399
402
|
1.upto(100) { agent.gauge('a', rand(50)) }
|
400
|
-
expect(agent.instance_variable_get(:@
|
403
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to be > 0
|
401
404
|
agent.flush
|
402
|
-
expect(agent.instance_variable_get(:@
|
405
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
403
406
|
wait do
|
404
407
|
expect(server.commands.grep(/^gauge a /).size).to eq(100)
|
405
408
|
end
|
@@ -437,7 +440,7 @@ shared_examples "Instrumental Agent" do
|
|
437
440
|
agent.increment('reconnect_test', 1, 1234)
|
438
441
|
wait
|
439
442
|
# The agent should not have sent the metric yet, the server is not responding
|
440
|
-
expect(agent.
|
443
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
441
444
|
end
|
442
445
|
|
443
446
|
it "should warn once when buffer is full" do
|
@@ -472,14 +475,14 @@ shared_examples "Instrumental Agent" do
|
|
472
475
|
agent.increment('reconnect_test', 1, 1234)
|
473
476
|
wait
|
474
477
|
# Since server hasn't responded to hello or authenticate, worker thread will not send data
|
475
|
-
expect(agent.
|
478
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
476
479
|
end
|
477
480
|
end
|
478
481
|
|
479
482
|
context 'server hangup' do
|
480
483
|
it "should cancel the worker thread when the host has hung up" do
|
481
484
|
# Start the background agent thread and let it send one metric successfully
|
482
|
-
agent.gauge('
|
485
|
+
agent.gauge('connection_failure1', 1, 1234)
|
483
486
|
wait do
|
484
487
|
expect(server.commands.grep(/connection_failure/).size).to eq(1)
|
485
488
|
end
|
@@ -487,13 +490,13 @@ shared_examples "Instrumental Agent" do
|
|
487
490
|
server.stop
|
488
491
|
wait
|
489
492
|
# Send one metric to the stopped server
|
490
|
-
agent.gauge('
|
493
|
+
agent.gauge('connection_failure2', 1, 1234)
|
491
494
|
# The agent thread should have stopped running since the network write would
|
492
495
|
# have failed. The queue will still contain the metric that has yet to be sent
|
493
496
|
wait do
|
494
497
|
expect(agent.send(:running?)).to eq(false)
|
495
498
|
end
|
496
|
-
expect(agent.
|
499
|
+
expect(agent.sender_queue.size).to eq(1)
|
497
500
|
end
|
498
501
|
|
499
502
|
it "should restart the worker thread after hanging it up during an unreachable host event" do
|
@@ -512,7 +515,7 @@ shared_examples "Instrumental Agent" do
|
|
512
515
|
wait do
|
513
516
|
expect(agent.send(:running?)).to eq(false)
|
514
517
|
end
|
515
|
-
expect(agent.
|
518
|
+
expect(agent.sender_queue.size).to eq(1)
|
516
519
|
# Start the server back up again
|
517
520
|
server.listen
|
518
521
|
# Sending another metric should kickstart the background worker thread
|
@@ -520,12 +523,76 @@ shared_examples "Instrumental Agent" do
|
|
520
523
|
# The agent should now be running the background thread, and the queue should be empty
|
521
524
|
wait do
|
522
525
|
expect(agent.send(:running?)).to eq(true)
|
523
|
-
expect(agent.
|
526
|
+
expect(agent.sender_queue.size).to eq(0)
|
524
527
|
end
|
525
528
|
end
|
526
529
|
|
527
|
-
|
530
|
+
it "should restart the worker thread after hanging it up during a bad ssl handshake event" do
|
531
|
+
# Start the background agent thread and let it send one metric successfully
|
532
|
+
agent.gauge('connection_failure', 1, 1234)
|
533
|
+
wait do
|
534
|
+
expect(server.commands.grep(/connection_failure/).size).to eq(1)
|
535
|
+
end
|
536
|
+
# Make the agent return the relevant exception on the next connection test
|
537
|
+
test_connection_fail = true
|
538
|
+
tc = agent.method(:test_connection)
|
539
|
+
allow(agent).to receive(:test_connection) do |*args, &block|
|
540
|
+
test_connection_fail ? raise(OpenSSL::SSL::SSLError.new) : tc.call(*args)
|
541
|
+
end
|
542
|
+
|
543
|
+
# Send one metric to the agent
|
544
|
+
agent.gauge('connection_failure', 1, 1234)
|
545
|
+
# The agent thread should have stopped running since the network write would
|
546
|
+
# have failed.
|
547
|
+
wait do
|
548
|
+
expect(agent.send(:running?)).to eq(false)
|
549
|
+
end
|
550
|
+
# The command is not in the queue
|
551
|
+
expect(agent.sender_queue.size).to eq(0)
|
552
|
+
# allow the agent to behave normally
|
553
|
+
test_connection_fail = false
|
554
|
+
# Sending another metric should kickstart the background worker thread
|
555
|
+
agent.gauge('connection_failure', 1, 1234)
|
556
|
+
# The agent should now be running the background thread, and the queue should be empty
|
557
|
+
wait do
|
558
|
+
expect(agent.send(:running?)).to eq(true)
|
559
|
+
expect(agent.sender_queue.size).to eq(0)
|
560
|
+
expect(server.commands.grep(/connection_failure/).size).to eq(2)
|
561
|
+
end
|
562
|
+
end
|
528
563
|
|
564
|
+
it "should accurately count failures so that backoff can work as intended" do
|
565
|
+
# Start the background agent thread and let it send one metric successfully
|
566
|
+
agent.gauge('connection_failure', 1, 1234)
|
567
|
+
wait do
|
568
|
+
expect(server.commands.grep(/connection_failure/).size).to eq(1)
|
569
|
+
end
|
570
|
+
|
571
|
+
# configure test_connection to fail in a way that won't kill the inner loop
|
572
|
+
test_connection_fail = true
|
573
|
+
tc = agent.method(:test_connection)
|
574
|
+
allow(agent).to receive(:test_connection) do |*args, &block|
|
575
|
+
test_connection_fail ? raise("test_connection_fail") : tc.call(*args)
|
576
|
+
end
|
577
|
+
|
578
|
+
# send some metrics
|
579
|
+
agent.gauge('connection_failure_1', 1, 1234)
|
580
|
+
agent.gauge('connection_failure_2', 1, 1234)
|
581
|
+
agent.gauge('connection_failure_3', 1, 1234)
|
582
|
+
wait do
|
583
|
+
expect(agent.instance_variable_get(:@failures)).to be > 0
|
584
|
+
expect(agent.sender_queue.size).to be > 0
|
585
|
+
end
|
586
|
+
|
587
|
+
# let the loop proceed
|
588
|
+
test_connection_fail = false
|
589
|
+
|
590
|
+
wait do
|
591
|
+
expect(agent.send(:running?)).to eq(true)
|
592
|
+
expect(agent.sender_queue.size).to eq(0)
|
593
|
+
end
|
594
|
+
end
|
595
|
+
end
|
529
596
|
|
530
597
|
context 'not authenticating' do
|
531
598
|
# Server will fail all authentication attempts
|
@@ -535,7 +602,7 @@ shared_examples "Instrumental Agent" do
|
|
535
602
|
agent.increment('reconnect_test', 1, 1234)
|
536
603
|
wait
|
537
604
|
# Metrics should not have been sent since all authentication failed
|
538
|
-
expect(agent.
|
605
|
+
expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
|
539
606
|
end
|
540
607
|
end
|
541
608
|
|
@@ -569,20 +636,21 @@ shared_examples "Instrumental Agent" do
|
|
569
636
|
end
|
570
637
|
end
|
571
638
|
|
572
|
-
it "should
|
639
|
+
it "should follow normal exit procedures whether or not there are commands queued" do
|
573
640
|
allow(agent).to receive(:open_socket) { |*args, &block| sleep(5) && block.call }
|
574
|
-
with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' =>
|
575
|
-
if (pid = fork { agent.increment('foo', 1); agent.
|
641
|
+
with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 1) do
|
642
|
+
if (pid = fork { agent.increment('foo', 1); agent.sender_queue.clear })
|
576
643
|
tm = Time.now.to_f
|
577
644
|
Process.wait(pid)
|
578
645
|
diff = Time.now.to_f - tm
|
579
|
-
expect(diff).to be <
|
646
|
+
expect(diff).to be < 2
|
647
|
+
expect(diff).to be > 1
|
580
648
|
end
|
581
649
|
end
|
582
650
|
end
|
583
651
|
end
|
584
652
|
|
585
|
-
it "should not wait longer than EXIT_FLUSH_TIMEOUT to attempt flushing the socket when disconnecting" do
|
653
|
+
it "should not wait much longer than EXIT_FLUSH_TIMEOUT to attempt flushing the socket when disconnecting" do
|
586
654
|
agent.increment('foo', 1)
|
587
655
|
wait do
|
588
656
|
expect(server.commands.grep(/foo/).size).to eq(1)
|
@@ -598,12 +666,13 @@ shared_examples "Instrumental Agent" do
|
|
598
666
|
raise
|
599
667
|
end
|
600
668
|
end.join
|
601
|
-
end
|
669
|
+
end.at_least(1).times
|
670
|
+
|
602
671
|
with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
|
603
672
|
tm = Time.now.to_f
|
604
673
|
agent.cleanup
|
605
674
|
diff = Time.now.to_f - tm
|
606
|
-
expect(diff).to be <= 3
|
675
|
+
expect(diff).to be <= 3.1
|
607
676
|
end
|
608
677
|
end
|
609
678
|
|
@@ -658,7 +727,7 @@ shared_examples "Instrumental Agent" do
|
|
658
727
|
expect(agent.send(:running?)).to eq(true)
|
659
728
|
|
660
729
|
# Setup a failure for the next command so we'll break out of the inner
|
661
|
-
# loop in
|
730
|
+
# loop in run_sender_loop causing another call to open_socket
|
662
731
|
test_connection_fail = true
|
663
732
|
tc = agent.method(:test_connection)
|
664
733
|
allow(agent).to receive(:test_connection) { |*args, &block| test_connection_fail ? raise("fail") : tc.call(*args) }
|
@@ -735,6 +804,313 @@ shared_examples "Instrumental Agent" do
|
|
735
804
|
end
|
736
805
|
end
|
737
806
|
end
|
807
|
+
|
808
|
+
describe Instrumental::Agent, "aggregation" do
|
809
|
+
context "aggregation enabled" do
|
810
|
+
let(:frequency) { 2 }
|
811
|
+
|
812
|
+
it "can be enabled at Agent.new time" do
|
813
|
+
expect(agent.frequency).to eq(2)
|
814
|
+
end
|
815
|
+
|
816
|
+
it "can be modified by setting the agent frequency" do
|
817
|
+
agent.frequency = 15
|
818
|
+
expect(agent.frequency).to eq(15)
|
819
|
+
end
|
820
|
+
|
821
|
+
it "is disabled by default" do
|
822
|
+
agent = Instrumental::Agent.new('test_token')
|
823
|
+
expect(agent.frequency.to_f).to eq(0)
|
824
|
+
end
|
825
|
+
|
826
|
+
it "should only allow frequencies that align with minutes" do
|
827
|
+
(-5..100).each do |freq|
|
828
|
+
agent.frequency = freq
|
829
|
+
expect(Instrumental::Agent::VALID_FREQUENCIES).to include(agent.frequency)
|
830
|
+
end
|
831
|
+
end
|
832
|
+
|
833
|
+
it "bypasses aggregator queue entirely for most commands when frequency == 0" do
|
834
|
+
agent.frequency = 0 # this is red - 0 for green
|
835
|
+
expect(Instrumental::EventAggregator).not_to receive(:new)
|
836
|
+
agent.increment('a_metric')
|
837
|
+
end
|
838
|
+
|
839
|
+
it "adds data to the event aggregator and does not immediately send it" do
|
840
|
+
Timecop.travel start_of_minute
|
841
|
+
agent.increment('test')
|
842
|
+
wait do
|
843
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
844
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.values.first.metric).to eq('test')
|
845
|
+
end
|
846
|
+
end
|
847
|
+
|
848
|
+
it "batches data before sending" do
|
849
|
+
Timecop.freeze do
|
850
|
+
agent.increment('a_metric')
|
851
|
+
agent.increment('a_metric')
|
852
|
+
agent.increment('another_metric')
|
853
|
+
end
|
854
|
+
agent.flush(true)
|
855
|
+
wait do
|
856
|
+
expect(server.commands.grep(/_metric/).size).to eq(2)
|
857
|
+
aggregated_metric = server.commands.grep(/a_metric/).first.split(" ")
|
858
|
+
expect(aggregated_metric[2].to_i).to eq(2) # value
|
859
|
+
expect(aggregated_metric[4].to_i).to eq(2) # count
|
860
|
+
end
|
861
|
+
end
|
862
|
+
|
863
|
+
it "aggregates to the specified frequency within the aggregator" do
|
864
|
+
Timecop.travel(start_of_minute)
|
865
|
+
agent.frequency = 15
|
866
|
+
expect(agent.frequency).not_to be(Instrumental::Agent::DEFAULT_FREQUENCY)
|
867
|
+
agent.increment('metric', 1, Time.at(0))
|
868
|
+
|
869
|
+
# will get aligned to the closest frequency (15)
|
870
|
+
agent.increment('metric', 1, Time.at(20))
|
871
|
+
wait do
|
872
|
+
expect(agent.instance_variable_get(:@event_aggregator).values.keys).to eq(["metric:0", "metric:15"])
|
873
|
+
end
|
874
|
+
agent.flush
|
875
|
+
wait do
|
876
|
+
expect(server.commands.grep(/metric 1 0/).size).to eq(1)
|
877
|
+
expect(server.commands.grep(/metric 1 15/).size).to eq(1)
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
it "flushes data from both queues before sending" do
|
882
|
+
Timecop.freeze do
|
883
|
+
100.times do |i|
|
884
|
+
agent.increment("test_metric_#{i}")
|
885
|
+
agent.increment("other_metric")
|
886
|
+
end
|
887
|
+
end
|
888
|
+
|
889
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to be > 0
|
890
|
+
agent.flush
|
891
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
892
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
893
|
+
|
894
|
+
wait do
|
895
|
+
expect(server.commands.grep(/test_metric/).size).to eq(100)
|
896
|
+
expect(server.commands.grep(/other_metric/).size).to eq(1)
|
897
|
+
end
|
898
|
+
end
|
899
|
+
|
900
|
+
it "does not batch notices" do
|
901
|
+
agent.frequency = 60
|
902
|
+
agent.notice "things are happening", 0, 100
|
903
|
+
agent.notice "things are happening", 0, 100
|
904
|
+
agent.notice "things are happening", 0, 100
|
905
|
+
wait do
|
906
|
+
expect(server.commands.grep(/things are happening/).size).to eq(3)
|
907
|
+
end
|
908
|
+
end
|
909
|
+
|
910
|
+
it "can be disabled by setting frequency to nil" do
|
911
|
+
agent.frequency = nil
|
912
|
+
expect(Instrumental::EventAggregator).not_to receive(:new)
|
913
|
+
agent.increment('metric')
|
914
|
+
wait do
|
915
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
916
|
+
end
|
917
|
+
end
|
918
|
+
|
919
|
+
it "can be disabled by setting frequency to 0" do
|
920
|
+
agent.frequency = 0
|
921
|
+
expect(Instrumental::EventAggregator).not_to receive(:new)
|
922
|
+
agent.increment('metric')
|
923
|
+
wait do
|
924
|
+
expect(server.commands.grep(/metric/).size).to eq(1)
|
925
|
+
end
|
926
|
+
end
|
927
|
+
|
928
|
+
it "automatically uses the highest-without-going-over frequency for a bad frequency" do
|
929
|
+
agent.frequency = 17
|
930
|
+
expect(agent.frequency).to eq(15)
|
931
|
+
agent.frequency = 69420
|
932
|
+
expect(agent.frequency).to eq(60)
|
933
|
+
agent.frequency = 0
|
934
|
+
expect(agent.frequency).to eq(0)
|
935
|
+
agent.frequency = -1
|
936
|
+
expect(agent.frequency).to eq(0)
|
937
|
+
end
|
938
|
+
|
939
|
+
it "can take strings as frequency" do
|
940
|
+
agent = Instrumental::Agent.new('test_token', :frequency => "15")
|
941
|
+
expect(agent.frequency).to eq(15)
|
942
|
+
end
|
943
|
+
|
944
|
+
it "should not be enabled at the same time as synchronous" do
|
945
|
+
expect(Instrumental::Agent.logger).to receive(:warn).with(/Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode./)
|
946
|
+
agent = Instrumental::Agent.new('test_token', :synchronous => true, :frequency => 6)
|
947
|
+
expect(agent.synchronous).to eq(true)
|
948
|
+
expect(agent.frequency).to eq(0)
|
949
|
+
end
|
950
|
+
|
951
|
+
it "should use synchronous mode if it is enabled, even if turned on after frequency set at start" do
|
952
|
+
agent.increment('metric')
|
953
|
+
agent.increment('metric')
|
954
|
+
agent.synchronous = true
|
955
|
+
agent.increment('metric')
|
956
|
+
wait do
|
957
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
958
|
+
end
|
959
|
+
agent.flush
|
960
|
+
wait do
|
961
|
+
expect(server.commands.grep(/metric 1/).size).to eq(1)
|
962
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
963
|
+
end
|
964
|
+
end
|
965
|
+
|
966
|
+
it "sends aggregated metrics after specified frequency, even if no flush is sent" do
|
967
|
+
agent.frequency = 1
|
968
|
+
Timecop.travel(start_of_minute)
|
969
|
+
agent.increment('metric')
|
970
|
+
agent.increment('metric')
|
971
|
+
agent.gauge('other', 1)
|
972
|
+
agent.gauge('other', 1)
|
973
|
+
agent.gauge('other', 1)
|
974
|
+
sleep (0.5)
|
975
|
+
wait { expect(server.commands.grep(/metric/).size).to eq(0) }
|
976
|
+
sleep (0.51) # total sleep > 1 frequency
|
977
|
+
|
978
|
+
expect(server.commands.grep(/metric 2/).size).to eq(1)
|
979
|
+
expect(server.commands.grep(/other 3/).size).to eq(1)
|
980
|
+
end
|
981
|
+
|
982
|
+
it "if aggregator is at max size, next command will force a forward to the sender thread" do
|
983
|
+
Timecop.travel(start_of_minute)
|
984
|
+
with_constants('Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 3) do
|
985
|
+
agent.increment('overflow_test1')
|
986
|
+
agent.increment('overflow_test2')
|
987
|
+
agent.increment('overflow_test3')
|
988
|
+
agent.increment('overflow_test4')
|
989
|
+
agent.increment('overflow_test5')
|
990
|
+
|
991
|
+
# only 1 because the 5th command triggers a forward of the first 4
|
992
|
+
wait do
|
993
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
|
994
|
+
end
|
995
|
+
agent.flush
|
996
|
+
wait do
|
997
|
+
expect(server.commands.grep(/overflow_test/).size).to eq(5)
|
998
|
+
end
|
999
|
+
end
|
1000
|
+
end
|
1001
|
+
|
1002
|
+
context do
|
1003
|
+
let(:listen) { false }
|
1004
|
+
it "will not send aggregators to the sender queue if the sender thread is not ready" do
|
1005
|
+
Timecop.travel(start_of_minute)
|
1006
|
+
agent.frequency = 1
|
1007
|
+
|
1008
|
+
with_constants('Instrumental::Agent::MAX_BUFFER' => 3,
|
1009
|
+
'Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 4) do
|
1010
|
+
|
1011
|
+
# fill the queue
|
1012
|
+
agent.increment('overflow_test1')
|
1013
|
+
agent.increment('overflow_test2')
|
1014
|
+
agent.increment('overflow_test3')
|
1015
|
+
|
1016
|
+
# wait until they are all in the aggregator
|
1017
|
+
wait do
|
1018
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1019
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1020
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
# fill the queue again
|
1024
|
+
agent.increment('overflow_test1')
|
1025
|
+
agent.increment('overflow_test2')
|
1026
|
+
agent.increment('overflow_test3')
|
1027
|
+
|
1028
|
+
# wait until they are all in the aggregator
|
1029
|
+
wait do
|
1030
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1031
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1032
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
# wait for the aggregator to get forwarded and popped by the sender
|
1036
|
+
wait do
|
1037
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1038
|
+
expect(agent.instance_variable_get(:@event_aggregator)).to eq(nil)
|
1039
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
# fill the queue again
|
1043
|
+
agent.increment('overflow_test4')
|
1044
|
+
agent.increment('overflow_test5')
|
1045
|
+
agent.increment('overflow_test6')
|
1046
|
+
|
1047
|
+
# wait for them all to be in the aggregator
|
1048
|
+
wait do
|
1049
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
|
1050
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
|
1051
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
# sleep until the next forward is done
|
1055
|
+
sleep(agent.frequency + 0.1)
|
1056
|
+
|
1057
|
+
# fill the queue again
|
1058
|
+
agent.increment('overflow_test7')
|
1059
|
+
agent.increment('overflow_test8')
|
1060
|
+
agent.increment('overflow_test9')
|
1061
|
+
|
1062
|
+
# because sending is blocked, the prevous aggregator never sent
|
1063
|
+
# when it hits max size, the aggregator queue starts backing up
|
1064
|
+
wait do
|
1065
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(1)
|
1066
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1067
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1068
|
+
end
|
1069
|
+
|
1070
|
+
# send 3 more items, to overflow the aggregator queue
|
1071
|
+
allow(agent.logger).to receive(:debug)
|
1072
|
+
expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_testc 4 300 1")
|
1073
|
+
agent.increment('overflow_testa')
|
1074
|
+
agent.increment('overflow_testb')
|
1075
|
+
agent.increment('overflow_testc', 4, 300, 1) # will get dropped
|
1076
|
+
|
1077
|
+
wait do
|
1078
|
+
expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
|
1079
|
+
expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
|
1080
|
+
expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
|
1081
|
+
end
|
1082
|
+
end
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
if FORK_SUPPORTED
|
1087
|
+
it "should automatically reconnect when forked when aggregation is enabled" do
|
1088
|
+
Timecop.travel start_of_minute
|
1089
|
+
agent.frequency = 10
|
1090
|
+
|
1091
|
+
agent.increment('fork_reconnect_test1', 1, 0, 1)
|
1092
|
+
fork do
|
1093
|
+
agent.increment('fork_reconnect_test2', 1, 0, 1) # triggers reconnect
|
1094
|
+
exit
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
|
1098
|
+
sleep 1
|
1099
|
+
agent.increment('fork_reconnect_test3', 1, 0, 1) # triggers reconnect
|
1100
|
+
|
1101
|
+
agent.flush
|
1102
|
+
expect(server.connect_count).to eq(2)
|
1103
|
+
|
1104
|
+
wait do
|
1105
|
+
expect(server.commands).to include("increment fork_reconnect_test1 1 0 1")
|
1106
|
+
expect(server.commands).to include("increment fork_reconnect_test2 1 0 1")
|
1107
|
+
expect(server.commands).to include("increment fork_reconnect_test3 1 0 1")
|
1108
|
+
expect(server.commands.grep(/fork_reconnect/).size).to eq(3)
|
1109
|
+
end
|
1110
|
+
end
|
1111
|
+
end
|
1112
|
+
end
|
1113
|
+
end
|
738
1114
|
end
|
739
1115
|
end
|
740
1116
|
|