instrumental_agent 2.0.0.alpha → 3.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,47 +1,5 @@
1
- require 'capistrano'
2
- require 'instrumental_agent'
3
- require 'etc'
4
-
5
- if Capistrano::Configuration.instance
6
- Capistrano::Configuration.instance.load do
7
- namespace :instrumental do
8
- namespace :util do
9
- desc "marker for beginning of deploy"
10
- task :deploy_start do
11
- set :instrumental_deploy_start, Time.now
12
- end
13
-
14
- desc "marker for end of deploy"
15
- task :deploy_end do
16
- set :instrumental_deploy_end, Time.now
17
- end
18
- end
19
-
20
- desc "send a notice to instrumental about the deploy"
21
- task :record_deploy_notice do
22
- start_at = exists?(:instrumental_deploy_start) ? instrumental_deploy_start : Time.now
23
- end_at = exists?(:instrumental_deploy_end) ? instrumental_deploy_end : start_at
24
- deploy_duration_in_seconds = end_at - start_at
25
- deployer = Etc.getlogin.chomp
26
- agent_options = { :synchronous => true }
27
- agent_options[:collector] = instrumental_host if exists?(:instrumental_host)
28
- agent = Instrumental::Agent.new(instrumental_key, agent_options)
29
- message = if exists?(:deploy_message)
30
- deploy_message
31
- else
32
- "#{deployer} deployed #{current_revision}"
33
- end
34
- agent.notice(message,
35
- start_at,
36
- deploy_duration_in_seconds)
37
- logger.info("Notified Instrumental of deployment")
38
- end
39
- end
40
-
41
- before "deploy", "instrumental:util:deploy_start"
42
- after "deploy", "instrumental:util:deploy_end"
43
- before "deploy:migrations", "instrumental:util:deploy_start"
44
- after "deploy:migrations", "instrumental:util:deploy_end"
45
- after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
46
- end
1
+ if Gem::Specification.find_by_name("capistrano").version >= Gem::Version.new("3.0.0")
2
+ load File.expand_path("../capistrano/capistrano3.rake", __FILE__)
3
+ else
4
+ require_relative "capistrano/capistrano2"
47
5
  end
@@ -0,0 +1,47 @@
1
+ require "etc"
2
+ require "instrumental_agent"
3
+
4
+ Capistrano::Configuration.instance.load do
5
+ _cset(:instrumental_hooks) { true }
6
+ _cset(:instrumental_key) { nil }
7
+ _cset(:deployer) { Etc.getlogin.chomp }
8
+
9
+ if fetch(:instrumental_hooks)
10
+ before "deploy", "instrumental:util:deploy_start"
11
+ after "deploy", "instrumental:util:deploy_end"
12
+ before "deploy:migrations", "instrumental:util:deploy_start"
13
+ after "deploy:migrations", "instrumental:util:deploy_end"
14
+ after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
15
+ end
16
+
17
+ namespace :instrumental do
18
+ namespace :util do
19
+ desc "marker for beginning of deploy"
20
+ task :deploy_start do
21
+ set :instrumental_deploy_start, Time.now
22
+ end
23
+
24
+ desc "marker for end of deploy"
25
+ task :deploy_end do
26
+ set :instrumental_deploy_end, Time.now
27
+ end
28
+ end
29
+
30
+ desc "send a notice to instrumental about the deploy"
31
+ task :record_deploy_notice do
32
+ start_at = fetch(:instrumental_deploy_start, Time.now)
33
+ end_at = fetch(:instrumental_deploy_end, start_at)
34
+ deploy_duration_in_seconds = end_at - start_at
35
+ deployer = fetch(:deployer)
36
+ agent_options = { :synchronous => true }
37
+ agent_options[:collector] = instrumental_host if fetch(:instrumental_host, false)
38
+ agent = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
39
+ message = fetch(:deploy_message, "#{deployer} deployed #{current_revision}")
40
+
41
+ agent.notice(message,
42
+ start_at,
43
+ deploy_duration_in_seconds)
44
+ logger.info("Notified Instrumental of deployment")
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,56 @@
1
+ require "etc"
2
+ require "instrumental_agent"
3
+
4
+ namespace :load do
5
+ task :defaults do
6
+ set :instrumental_hooks, true
7
+ set :instrumental_key, nil
8
+ set :deployer, Etc.getlogin.chomp
9
+ end
10
+ end
11
+
12
+ namespace :deploy do
13
+ before :starting, :check_instrumental_hooks do
14
+ invoke "instrumental:util:add_hooks" if fetch(:instrumental_hooks)
15
+ end
16
+ end
17
+
18
+ namespace :instrumental do
19
+ namespace :util do
20
+ desc "add instrumental hooks to deploy"
21
+ task :add_hooks do
22
+ before "deploy", "instrumental:util:deploy_start"
23
+ after "deploy", "instrumental:util:deploy_end"
24
+ after "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
25
+ end
26
+
27
+ desc "marker for beginning of deploy"
28
+ task :deploy_start do
29
+ set :instrumental_deploy_start, Time.now
30
+ end
31
+
32
+ desc "marker for end of deploy"
33
+ task :deploy_end do
34
+ set :instrumental_deploy_end, Time.now
35
+ end
36
+ end
37
+
38
+ desc "send a notice to instrumental about the deploy"
39
+ task :record_deploy_notice do
40
+ start_at = fetch(:instrumental_deploy_start, Time.now)
41
+ end_at = fetch(:instrumental_deploy_end, start_at)
42
+ deploy_duration_in_seconds = end_at - start_at
43
+ deployer = fetch(:deployer)
44
+ agent_options = { :synchronous => true }
45
+ agent_options[:collector] = instrumental_host if fetch(:instrumental_host, false)
46
+ message = fetch(:deploy_message, "#{deployer} deployed #{fetch(:current_revision)}".strip)
47
+
48
+ if fetch(:instrumental_key)
49
+ agent = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
50
+ agent.notice(message,
51
+ start_at,
52
+ deploy_duration_in_seconds)
53
+ puts "Notified Instrumental of deployment"
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,32 @@
1
+ module Instrumental
2
+ METRIC_TYPES = ["increment".freeze, "gauge".freeze].freeze
3
+
4
+ Command = Struct.new(:command, :metric, :value, :time, :count) do
5
+ def initialize(command, metric, value, time, count)
6
+ super(command, metric, value, time.to_i, count.to_i)
7
+ end
8
+
9
+ def to_s
10
+ [command, metric, value, time, count].map(&:to_s).join(" ")
11
+ end
12
+
13
+ def metadata
14
+ "#{metric}:#{time}".freeze
15
+ end
16
+
17
+ def +(other_command)
18
+ return self if other_command.nil?
19
+ Command.new(command, metric, value + other_command.value, time, count + other_command.count)
20
+ end
21
+ end
22
+
23
+ Notice = Struct.new(:note, :time, :duration) do
24
+ def initialize(note, time, duration)
25
+ super(note, time.to_i, duration.to_i)
26
+ end
27
+
28
+ def to_s
29
+ ["notice".freeze, time, duration, note].map(&:to_s).join(" ")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ class EventAggregator
2
+ attr_accessor :counts, :values, :received_at, :frequency
3
+
4
+ def initialize(frequency:)
5
+ @values = Hash.new
6
+ @frequency = frequency
7
+ end
8
+
9
+ def put(command)
10
+ command_at = command.time
11
+ unless(command_at % frequency == 0)
12
+ command.time = (command_at - (command_at % frequency))
13
+ end
14
+ metadata = command.metadata
15
+ @values[metadata] = (command + @values[metadata])
16
+ end
17
+
18
+ def size
19
+ @values.size
20
+ end
21
+
22
+ def coerce_time(time)
23
+ itime = time.to_i
24
+ (itime - (itime % frequency)).to_i
25
+ end
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Instrumental
2
- VERSION = "2.0.0.alpha"
2
+ VERSION = "3.0.0.beta2"
3
3
  end
@@ -9,6 +9,8 @@ def wait(n=0.2, &block)
9
9
  if (Time.now - start) < 5
10
10
  sleep n
11
11
  retry
12
+ else
13
+ raise ex
12
14
  end
13
15
  end
14
16
  else
@@ -37,7 +39,8 @@ shared_examples "Instrumental Agent" do
37
39
  let(:token) { 'test_token' }
38
40
  let(:address) { server.host_and_port }
39
41
  let(:metrician) { false }
40
- let(:agent) { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician) }
42
+ let(:frequency) { 0 }
43
+ let(:agent) { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician, :frequency => frequency) }
41
44
 
42
45
  # Server options
43
46
  let(:listen) { true }
@@ -45,6 +48,12 @@ shared_examples "Instrumental Agent" do
45
48
  let(:authenticate) { true }
46
49
  let(:server) { TestServer.new(:listen => listen, :authenticate => authenticate, :response => response, :secure => secure?) }
47
50
 
51
+ # Time Travel Options
52
+ let(:start_of_minute) do
53
+ now = Time.now.to_i
54
+ Time.at(now - (now % 60))
55
+ end
56
+
48
57
  before do
49
58
  Instrumental::Agent.logger.level = Logger::UNKNOWN
50
59
  @server = server
@@ -226,16 +235,16 @@ shared_examples "Instrumental Agent" do
226
235
  allow(agent.logger).to receive(:debug)
227
236
  expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
228
237
  expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 5 300 1")
229
- 5.times do |i|
230
- agent.increment('overflow_test', i + 1, 300)
231
- end
232
- wait do
233
- expect(server.commands).to include("increment overflow_test 1 300 1")
234
- expect(server.commands).to include("increment overflow_test 2 300 1")
235
- expect(server.commands).to include("increment overflow_test 3 300 1")
236
- expect(server.commands).to_not include("increment overflow_test 4 300 1")
237
- expect(server.commands).to_not include("increment overflow_test 5 300 1")
238
+ 1.upto(5) do |i|
239
+ agent.increment('overflow_test', i, 300)
238
240
  end
241
+
242
+ wait
243
+ expect(agent.sender_queue.size).to eq(3)
244
+ expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 1 300 1")
245
+ expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 2 300 1")
246
+ expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 3 300 1")
247
+ expect(agent.sender_queue.size).to eq(0)
239
248
  end
240
249
  end
241
250
  end
@@ -246,7 +255,7 @@ shared_examples "Instrumental Agent" do
246
255
  5.times do |i|
247
256
  agent.increment('overflow_test', i + 1, 300)
248
257
  end
249
- expect(agent.instance_variable_get(:@queue).size).to eq(0)
258
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
250
259
  wait # let the server receive the commands
251
260
  expect(server.commands).to include("increment overflow_test 1 300 1")
252
261
  expect(server.commands).to include("increment overflow_test 2 300 1")
@@ -262,8 +271,10 @@ shared_examples "Instrumental Agent" do
262
271
  fork do
263
272
  agent.increment('fork_reconnect_test', 1, 3) # triggers reconnect
264
273
  end
274
+
265
275
  wait(1)
266
276
  agent.increment('fork_reconnect_test', 1, 4) # triggers reconnect
277
+
267
278
  wait(1)
268
279
  expect(server.connect_count).to eq(2)
269
280
 
@@ -279,17 +290,17 @@ shared_examples "Instrumental Agent" do
279
290
  sleep 1
280
291
  }
281
292
 
282
- run_worker_loop_calls = 0
283
- allow(agent).to receive(:run_worker_loop) {
284
- run_worker_loop_calls += 1
293
+ run_sender_loop_calls = 0
294
+ allow(agent).to receive(:run_sender_loop) {
295
+ run_sender_loop_calls += 1
285
296
  sleep 3 # keep the worker thread alive
286
297
  }
287
298
 
288
299
  t = Thread.new { agent.increment("race") }
289
300
  agent.increment("race")
290
301
  wait(2)
291
- expect(run_worker_loop_calls).to eq(1)
292
- expect(agent.queue.size).to eq(2)
302
+ expect(run_sender_loop_calls).to eq(1)
303
+ expect(agent.sender_queue.size).to eq(2)
293
304
  end
294
305
 
295
306
  it "should never let an exception reach the user" do
@@ -312,14 +323,6 @@ shared_examples "Instrumental Agent" do
312
323
  expect(agent.increment("test")).to eq(nil)
313
324
  end
314
325
 
315
- it "should track invalid metrics" do
316
- expect(agent.logger).to receive(:warn).with(/%%/)
317
- agent.increment(' %% .!#@$%^&*', 1, 1)
318
- wait do
319
- expect(server.commands.join("\n")).to include("increment agent.invalid_metric")
320
- end
321
- end
322
-
323
326
  it "should allow reasonable metric names" do
324
327
  agent.increment('a')
325
328
  agent.increment('a.b')
@@ -397,9 +400,9 @@ shared_examples "Instrumental Agent" do
397
400
 
398
401
  it "should allow flushing pending values to the server" do
399
402
  1.upto(100) { agent.gauge('a', rand(50)) }
400
- expect(agent.instance_variable_get(:@queue).size).to be > 0
403
+ expect(agent.instance_variable_get(:@sender_queue).size).to be > 0
401
404
  agent.flush
402
- expect(agent.instance_variable_get(:@queue).size).to eq(0)
405
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
403
406
  wait do
404
407
  expect(server.commands.grep(/^gauge a /).size).to eq(100)
405
408
  end
@@ -437,7 +440,7 @@ shared_examples "Instrumental Agent" do
437
440
  agent.increment('reconnect_test', 1, 1234)
438
441
  wait
439
442
  # The agent should not have sent the metric yet, the server is not responding
440
- expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
443
+ expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
441
444
  end
442
445
 
443
446
  it "should warn once when buffer is full" do
@@ -472,14 +475,14 @@ shared_examples "Instrumental Agent" do
472
475
  agent.increment('reconnect_test', 1, 1234)
473
476
  wait
474
477
  # Since server hasn't responded to hello or authenticate, worker thread will not send data
475
- expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
478
+ expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
476
479
  end
477
480
  end
478
481
 
479
482
  context 'server hangup' do
480
483
  it "should cancel the worker thread when the host has hung up" do
481
484
  # Start the background agent thread and let it send one metric successfully
482
- agent.gauge('connection_failure', 1, 1234)
485
+ agent.gauge('connection_failure1', 1, 1234)
483
486
  wait do
484
487
  expect(server.commands.grep(/connection_failure/).size).to eq(1)
485
488
  end
@@ -487,13 +490,13 @@ shared_examples "Instrumental Agent" do
487
490
  server.stop
488
491
  wait
489
492
  # Send one metric to the stopped server
490
- agent.gauge('connection_failure', 1, 1234)
493
+ agent.gauge('connection_failure2', 1, 1234)
491
494
  # The agent thread should have stopped running since the network write would
492
495
  # have failed. The queue will still contain the metric that has yet to be sent
493
496
  wait do
494
497
  expect(agent.send(:running?)).to eq(false)
495
498
  end
496
- expect(agent.queue.size).to eq(1)
499
+ expect(agent.sender_queue.size).to eq(1)
497
500
  end
498
501
 
499
502
  it "should restart the worker thread after hanging it up during an unreachable host event" do
@@ -512,7 +515,7 @@ shared_examples "Instrumental Agent" do
512
515
  wait do
513
516
  expect(agent.send(:running?)).to eq(false)
514
517
  end
515
- expect(agent.queue.size).to eq(1)
518
+ expect(agent.sender_queue.size).to eq(1)
516
519
  # Start the server back up again
517
520
  server.listen
518
521
  # Sending another metric should kickstart the background worker thread
@@ -520,12 +523,76 @@ shared_examples "Instrumental Agent" do
520
523
  # The agent should now be running the background thread, and the queue should be empty
521
524
  wait do
522
525
  expect(agent.send(:running?)).to eq(true)
523
- expect(agent.queue.size).to eq(0)
526
+ expect(agent.sender_queue.size).to eq(0)
524
527
  end
525
528
  end
526
529
 
527
- end
530
+ it "should restart the worker thread after hanging it up during a bad ssl handshake event" do
531
+ # Start the background agent thread and let it send one metric successfully
532
+ agent.gauge('connection_failure', 1, 1234)
533
+ wait do
534
+ expect(server.commands.grep(/connection_failure/).size).to eq(1)
535
+ end
536
+ # Make the agent return the relevant exception on the next connection test
537
+ test_connection_fail = true
538
+ tc = agent.method(:test_connection)
539
+ allow(agent).to receive(:test_connection) do |*args, &block|
540
+ test_connection_fail ? raise(OpenSSL::SSL::SSLError.new) : tc.call(*args)
541
+ end
528
542
 
543
+ # Send one metric to the agent
544
+ agent.gauge('connection_failure', 1, 1234)
545
+ # The agent thread should have stopped running since the network write would
546
+ # have failed.
547
+ wait do
548
+ expect(agent.send(:running?)).to eq(false)
549
+ end
550
+ # The command is not in the queue
551
+ expect(agent.sender_queue.size).to eq(0)
552
+ # allow the agent to behave normally
553
+ test_connection_fail = false
554
+ # Sending another metric should kickstart the background worker thread
555
+ agent.gauge('connection_failure', 1, 1234)
556
+ # The agent should now be running the background thread, and the queue should be empty
557
+ wait do
558
+ expect(agent.send(:running?)).to eq(true)
559
+ expect(agent.sender_queue.size).to eq(0)
560
+ expect(server.commands.grep(/connection_failure/).size).to eq(2)
561
+ end
562
+ end
563
+
564
+ it "should accurately count failures so that backoff can work as intended" do
565
+ # Start the background agent thread and let it send one metric successfully
566
+ agent.gauge('connection_failure', 1, 1234)
567
+ wait do
568
+ expect(server.commands.grep(/connection_failure/).size).to eq(1)
569
+ end
570
+
571
+ # configure test_connection to fail in a way that won't kill the inner loop
572
+ test_connection_fail = true
573
+ tc = agent.method(:test_connection)
574
+ allow(agent).to receive(:test_connection) do |*args, &block|
575
+ test_connection_fail ? raise("test_connection_fail") : tc.call(*args)
576
+ end
577
+
578
+ # send some metrics
579
+ agent.gauge('connection_failure_1', 1, 1234)
580
+ agent.gauge('connection_failure_2', 1, 1234)
581
+ agent.gauge('connection_failure_3', 1, 1234)
582
+ wait do
583
+ expect(agent.instance_variable_get(:@failures)).to be > 0
584
+ expect(agent.sender_queue.size).to be > 0
585
+ end
586
+
587
+ # let the loop proceed
588
+ test_connection_fail = false
589
+
590
+ wait do
591
+ expect(agent.send(:running?)).to eq(true)
592
+ expect(agent.sender_queue.size).to eq(0)
593
+ end
594
+ end
595
+ end
529
596
 
530
597
  context 'not authenticating' do
531
598
  # Server will fail all authentication attempts
@@ -535,7 +602,7 @@ shared_examples "Instrumental Agent" do
535
602
  agent.increment('reconnect_test', 1, 1234)
536
603
  wait
537
604
  # Metrics should not have been sent since all authentication failed
538
- expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
605
+ expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
539
606
  end
540
607
  end
541
608
 
@@ -569,20 +636,21 @@ shared_examples "Instrumental Agent" do
569
636
  end
570
637
  end
571
638
 
572
- it "should not wait to exit a process if there are no commands queued" do
639
+ it "should follow normal exit procedures whether or not there are commands queued" do
573
640
  allow(agent).to receive(:open_socket) { |*args, &block| sleep(5) && block.call }
574
- with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
575
- if (pid = fork { agent.increment('foo', 1); agent.queue.clear })
641
+ with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 1) do
642
+ if (pid = fork { agent.increment('foo', 1); agent.sender_queue.clear })
576
643
  tm = Time.now.to_f
577
644
  Process.wait(pid)
578
645
  diff = Time.now.to_f - tm
579
- expect(diff).to be < 1
646
+ expect(diff).to be < 2
647
+ expect(diff).to be > 1
580
648
  end
581
649
  end
582
650
  end
583
651
  end
584
652
 
585
- it "should not wait longer than EXIT_FLUSH_TIMEOUT to attempt flushing the socket when disconnecting" do
653
+ it "should not wait much longer than EXIT_FLUSH_TIMEOUT to attempt flushing the socket when disconnecting" do
586
654
  agent.increment('foo', 1)
587
655
  wait do
588
656
  expect(server.commands.grep(/foo/).size).to eq(1)
@@ -598,12 +666,13 @@ shared_examples "Instrumental Agent" do
598
666
  raise
599
667
  end
600
668
  end.join
601
- end
669
+ end.at_least(1).times
670
+
602
671
  with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
603
672
  tm = Time.now.to_f
604
673
  agent.cleanup
605
674
  diff = Time.now.to_f - tm
606
- expect(diff).to be <= 3
675
+ expect(diff).to be <= 3.1
607
676
  end
608
677
  end
609
678
 
@@ -658,7 +727,7 @@ shared_examples "Instrumental Agent" do
658
727
  expect(agent.send(:running?)).to eq(true)
659
728
 
660
729
  # Setup a failure for the next command so we'll break out of the inner
661
- # loop in run_worker_loop causing another call to open_socket
730
+ # loop in run_sender_loop causing another call to open_socket
662
731
  test_connection_fail = true
663
732
  tc = agent.method(:test_connection)
664
733
  allow(agent).to receive(:test_connection) { |*args, &block| test_connection_fail ? raise("fail") : tc.call(*args) }
@@ -735,6 +804,330 @@ shared_examples "Instrumental Agent" do
735
804
  end
736
805
  end
737
806
  end
807
+
808
+ describe Instrumental::Agent, "aggregation" do
809
+ context "aggregation enabled" do
810
+ let(:frequency) { 2 }
811
+
812
+ it "can be enabled at Agent.new time" do
813
+ expect(agent.frequency).to eq(2)
814
+ end
815
+
816
+ it "can be modified by setting the agent frequency" do
817
+ agent.frequency = 15
818
+ expect(agent.frequency).to eq(15)
819
+ end
820
+
821
+ it "is disabled by default" do
822
+ agent = Instrumental::Agent.new('test_token')
823
+ expect(agent.frequency.to_f).to eq(0)
824
+ end
825
+
826
+ it "should only allow frequencies that align with minutes" do
827
+ (-5..100).each do |freq|
828
+ agent.frequency = freq
829
+ expect(Instrumental::Agent::VALID_FREQUENCIES).to include(agent.frequency)
830
+ end
831
+ end
832
+
833
+ it "bypasses aggregator queue entirely for most commands when frequency == 0" do
834
+ agent.frequency = 0 # this is red - 0 for green
835
+ expect(EventAggregator).not_to receive(:new)
836
+ agent.increment('a_metric')
837
+ end
838
+
839
+ it "adds data to the event aggregator and does not immediately send it" do
840
+ Timecop.travel start_of_minute
841
+ agent.increment('test')
842
+ wait do
843
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
844
+ expect(agent.instance_variable_get(:@event_aggregator).values.values.first.metric).to eq('test')
845
+ end
846
+ end
847
+
848
+ it "batches data before sending" do
849
+ Timecop.freeze do
850
+ agent.increment('a_metric')
851
+ agent.increment('a_metric')
852
+ agent.increment('another_metric')
853
+ end
854
+ agent.flush(true)
855
+ wait do
856
+ expect(server.commands.grep(/_metric/).size).to eq(2)
857
+ aggregated_metric = server.commands.grep(/a_metric/).first.split(" ")
858
+ expect(aggregated_metric[2].to_i).to eq(2) # value
859
+ expect(aggregated_metric[4].to_i).to eq(2) # count
860
+ end
861
+ end
862
+
863
+ it "aggregates to the specified frequency within the aggregator" do
864
+ Timecop.travel(start_of_minute)
865
+ agent.frequency = 15
866
+ expect(agent.frequency).not_to be(Instrumental::Agent::DEFAULT_FREQUENCY)
867
+ agent.increment('metric', 1, Time.at(0))
868
+
869
+ # will get aligned to the closest frequency (15)
870
+ agent.increment('metric', 1, Time.at(20))
871
+ wait do
872
+ expect(agent.instance_variable_get(:@event_aggregator).values.keys).to eq(["metric:0", "metric:15"])
873
+ end
874
+ agent.flush
875
+ wait do
876
+ expect(server.commands.grep(/metric 1 0/).size).to eq(1)
877
+ expect(server.commands.grep(/metric 1 15/).size).to eq(1)
878
+ end
879
+ end
880
+
881
+ it "flushes data from both queues before sending" do
882
+ Timecop.freeze do
883
+ 100.times do |i|
884
+ agent.increment("test_metric_#{i}")
885
+ agent.increment("other_metric")
886
+ end
887
+ end
888
+
889
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to be > 0
890
+ agent.flush
891
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
892
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
893
+
894
+ wait do
895
+ expect(server.commands.grep(/test_metric/).size).to eq(100)
896
+ expect(server.commands.grep(/other_metric/).size).to eq(1)
897
+ end
898
+ end
899
+
900
+ it "does not batch notices" do
901
+ agent.frequency = 60
902
+ agent.notice "things are happening", 0, 100
903
+ agent.notice "things are happening", 0, 100
904
+ agent.notice "things are happening", 0, 100
905
+ wait do
906
+ expect(server.commands.grep(/things are happening/).size).to eq(3)
907
+ end
908
+ end
909
+
910
+ it "can be disabled by setting frequency to nil" do
911
+ agent.frequency = nil
912
+ expect(EventAggregator).not_to receive(:new)
913
+ agent.increment('metric')
914
+ wait do
915
+ expect(server.commands.grep(/metric/).size).to eq(1)
916
+ end
917
+ end
918
+
919
+ it "can be disabled by setting frequency to 0" do
920
+ agent.frequency = 0
921
+ expect(EventAggregator).not_to receive(:new)
922
+ agent.increment('metric')
923
+ wait do
924
+ expect(server.commands.grep(/metric/).size).to eq(1)
925
+ end
926
+ end
927
+
928
+ it "automatically uses the highest-without-going-over frequency for a bad frequency" do
929
+ agent.frequency = 17
930
+ expect(agent.frequency).to eq(15)
931
+ agent.frequency = 69420
932
+ expect(agent.frequency).to eq(60)
933
+ agent.frequency = 0
934
+ expect(agent.frequency).to eq(0)
935
+ agent.frequency = -1
936
+ expect(agent.frequency).to eq(0)
937
+ end
938
+
939
+ it "can take strings as frequency" do
940
+ agent = Instrumental::Agent.new('test_token', :frequency => "15")
941
+ expect(agent.frequency).to eq(15)
942
+ end
943
+
944
+ it "should not be enabled at the same time as synchronous" do
945
+ expect(Instrumental::Agent.logger).to receive(:warn).with(/Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode./)
946
+ agent = Instrumental::Agent.new('test_token', :synchronous => true, :frequency => 6)
947
+ expect(agent.synchronous).to eq(true)
948
+ expect(agent.frequency).to eq(0)
949
+ end
950
+
951
+ it "should use synchronous mode if it is enabled, even if turned on after frequency set at start" do
952
+ agent.increment('metric')
953
+ agent.increment('metric')
954
+ agent.synchronous = true
955
+ agent.increment('metric')
956
+ wait do
957
+ expect(server.commands.grep(/metric 1/).size).to eq(1)
958
+ end
959
+ agent.flush
960
+ wait do
961
+ expect(server.commands.grep(/metric 1/).size).to eq(1)
962
+ expect(server.commands.grep(/metric 2/).size).to eq(1)
963
+ end
964
+ end
965
+
966
+ it "sends aggregated metrics after specified frequency, even if no flush is sent" do
967
+ agent.frequency = 1
968
+ Timecop.travel(start_of_minute)
969
+ agent.increment('metric')
970
+ agent.increment('metric')
971
+ agent.gauge('other', 1)
972
+ agent.gauge('other', 1)
973
+ agent.gauge('other', 1)
974
+ sleep (0.5)
975
+ wait { expect(server.commands.grep(/metric/).size).to eq(0) }
976
+ sleep (0.51) # total sleep > 1 frequency
977
+
978
+ expect(server.commands.grep(/metric 2/).size).to eq(1)
979
+ expect(server.commands.grep(/other 3/).size).to eq(1)
980
+ end
981
+
982
+ # this test really relies on the worker threads not working unexpectedly
983
+ it "will overflow if the aggregator queue is full" do
984
+ Timecop.travel(start_of_minute)
985
+ with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
986
+ allow(agent.logger).to receive(:debug)
987
+ expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
988
+ agent.increment('overflow_test', 4, 300, 1)
989
+ agent.increment('overflow_test', 4, 300, 1)
990
+ agent.increment('overflow_test', 4, 300, 1)
991
+ agent.increment('overflow_test', 4, 300, 1)
992
+
993
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
994
+ agent.flush
995
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
996
+ end
997
+ end
998
+
999
+ it "if aggregator is at max size, next command will force a forward to the sender thread" do
1000
+ Timecop.travel(start_of_minute)
1001
+ with_constants('Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 3) do
1002
+ agent.increment('overflow_test1')
1003
+ agent.increment('overflow_test2')
1004
+ agent.increment('overflow_test3')
1005
+ agent.increment('overflow_test4')
1006
+ agent.increment('overflow_test5')
1007
+
1008
+ # only 1 because the 5th command triggers a forward of the first 4
1009
+ wait do
1010
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
1011
+ end
1012
+ agent.flush
1013
+ wait do
1014
+ expect(server.commands.grep(/overflow_test/).size).to eq(5)
1015
+ end
1016
+ end
1017
+ end
1018
+
1019
+ context do
1020
+ let(:listen) { false }
1021
+ it "will not send aggregators to the sender queue if the sender thread is not ready" do
1022
+ Timecop.travel(start_of_minute)
1023
+ agent.frequency = 1
1024
+
1025
+ with_constants('Instrumental::Agent::MAX_BUFFER' => 3,
1026
+ 'Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 4) do
1027
+
1028
+ # fill the queue
1029
+ agent.increment('overflow_test1')
1030
+ agent.increment('overflow_test2')
1031
+ agent.increment('overflow_test3')
1032
+
1033
+ # wait until they are all in the aggregator
1034
+ wait do
1035
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
1036
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
1037
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
1038
+ end
1039
+
1040
+ # fill the queue again
1041
+ agent.increment('overflow_test1')
1042
+ agent.increment('overflow_test2')
1043
+ agent.increment('overflow_test3')
1044
+
1045
+ # wait until they are all in the aggregator
1046
+ wait do
1047
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
1048
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
1049
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
1050
+ end
1051
+
1052
+ # wait for the aggregator to get forwarded and popped by the sender
1053
+ wait do
1054
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
1055
+ expect(agent.instance_variable_get(:@event_aggregator)).to eq(nil)
1056
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
1057
+ end
1058
+
1059
+ # fill the queue again
1060
+ agent.increment('overflow_test4')
1061
+ agent.increment('overflow_test5')
1062
+ agent.increment('overflow_test6')
1063
+
1064
+ # wait for them all to be in the aggregator
1065
+ wait do
1066
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
1067
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
1068
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
1069
+ end
1070
+
1071
+ # sleep until the next forward is done
1072
+ sleep(agent.frequency + 0.1)
1073
+
1074
+ # fill the queue again
1075
+ agent.increment('overflow_test7')
1076
+ agent.increment('overflow_test8')
1077
+ agent.increment('overflow_test9')
1078
+
1079
+ # because sending is blocked, the prevous aggregator never sent
1080
+ # when it hits max size, the aggregator queue starts backing up
1081
+ wait do
1082
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(1)
1083
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
1084
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
1085
+ end
1086
+
1087
+ # send 3 more items, to overflow the aggregator queue
1088
+ allow(agent.logger).to receive(:debug)
1089
+ expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_testc 4 300 1")
1090
+ agent.increment('overflow_testa')
1091
+ agent.increment('overflow_testb')
1092
+ agent.increment('overflow_testc', 4, 300, 1) # will get dropped
1093
+
1094
+ wait do
1095
+ expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
1096
+ expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
1097
+ expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
1098
+ end
1099
+ end
1100
+ end
1101
+ end
1102
+
1103
+ if FORK_SUPPORTED
1104
+ it "should automatically reconnect when forked when aggregation is enabled" do
1105
+ Timecop.travel start_of_minute
1106
+ agent.frequency = 10
1107
+
1108
+ agent.increment('fork_reconnect_test1', 1, 0, 1)
1109
+ fork do
1110
+ agent.increment('fork_reconnect_test2', 1, 0, 1) # triggers reconnect
1111
+ exit
1112
+ end
1113
+
1114
+
1115
+ sleep 1
1116
+ agent.increment('fork_reconnect_test3', 1, 0, 1) # triggers reconnect
1117
+
1118
+ agent.flush
1119
+ expect(server.connect_count).to eq(2)
1120
+
1121
+ wait do
1122
+ expect(server.commands).to include("increment fork_reconnect_test1 1 0 1")
1123
+ expect(server.commands).to include("increment fork_reconnect_test2 1 0 1")
1124
+ expect(server.commands).to include("increment fork_reconnect_test3 1 0 1")
1125
+ expect(server.commands.grep(/fork_reconnect/).size).to eq(3)
1126
+ end
1127
+ end
1128
+ end
1129
+ end
1130
+ end
738
1131
  end
739
1132
  end
740
1133