RubyGems - instrumental_agent - Versions diffs - 1.0.1 → 3.0.0.beta - Mend

instrumental_agent 1.0.1 → 3.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +5 -5
data/.ruby-version +1 -0
data/.travis.yml +4 -6
data/CHANGELOG.md +21 -0
data/Gemfile +3 -5
data/README.md +40 -9
data/instrumental_agent.gemspec +6 -2
data/lib/instrumental/agent.rb +262 -155
data/lib/instrumental/capistrano.rb +4 -46
data/lib/instrumental/capistrano/capistrano2.rb +47 -0
data/lib/instrumental/capistrano/capistrano3.rake +56 -0
data/lib/instrumental/command_structs.rb +32 -0
data/lib/instrumental/event_aggregator.rb +26 -0
data/lib/instrumental/version.rb +1 -1
data/script/setup +22 -4
data/script/test +34 -2
data/spec/agent_spec.rb +579 -109
data/spec/command_struct_specs.rb +20 -0
data/spec/event_aggregator_spec.rb +53 -0
data/spec/spec_helper.rb +8 -1
data/spec/test_server.rb +13 -6
metadata +47 -17
data/certs/equifax.ca.pem +0 -69
data/certs/geotrust.ca.pem +0 -80
data/certs/rapidssl.ca.pem +0 -94

data/lib/instrumental/capistrano.rb CHANGED

@@ -1,47 +1,5 @@
-require 'capistrano'
-require 'instrumental_agent'
-require 'etc'
-if Capistrano::Configuration.instance
-  Capistrano::Configuration.instance.load do
-    namespace :instrumental do
-      namespace :util do
-        desc "marker for beginning of deploy"
-        task :deploy_start do
-          set :instrumental_deploy_start, Time.now
-        end
-        desc "marker for end of deploy"
-        task :deploy_end do
-          set :instrumental_deploy_end, Time.now
-        end
-      end
-      desc "send a notice to instrumental about the deploy"
-      task :record_deploy_notice do
-        start_at = exists?(:instrumental_deploy_start) ? instrumental_deploy_start : Time.now
-        end_at = exists?(:instrumental_deploy_end) ? instrumental_deploy_end : start_at
-        deploy_duration_in_seconds = end_at - start_at
-        deployer = Etc.getlogin.chomp
-        agent_options = { :synchronous => true }
-        agent_options[:collector] = instrumental_host if exists?(:instrumental_host)
-        agent = Instrumental::Agent.new(instrumental_key, agent_options)
-        message = if exists?(:deploy_message)
-          deploy_message
-        else
-          "#{deployer} deployed #{current_revision}"
-        end
-        agent.notice(message,
-                     start_at,
-                     deploy_duration_in_seconds)
-        logger.info("Notified Instrumental of deployment")
-      end
-    end
-    before "deploy", "instrumental:util:deploy_start"
-    after  "deploy", "instrumental:util:deploy_end"
-    before "deploy:migrations", "instrumental:util:deploy_start"
-    after  "deploy:migrations", "instrumental:util:deploy_end"
-    after  "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
-  end
+if Gem::Specification.find_by_name("capistrano").version >= Gem::Version.new("3.0.0")
+  load File.expand_path("../capistrano/capistrano3.rake", __FILE__)
+else
+  require_relative "capistrano/capistrano2"
 end

data/lib/instrumental/capistrano/capistrano2.rb ADDED

@@ -0,0 +1,47 @@
+require "etc"
+require "instrumental_agent"
+Capistrano::Configuration.instance.load do
+  _cset(:instrumental_hooks) { true }
+  _cset(:instrumental_key) { nil }
+  _cset(:deployer) { Etc.getlogin.chomp }
+  if fetch(:instrumental_hooks)
+    before "deploy", "instrumental:util:deploy_start"
+    after  "deploy", "instrumental:util:deploy_end"
+    before "deploy:migrations", "instrumental:util:deploy_start"
+    after  "deploy:migrations", "instrumental:util:deploy_end"
+    after  "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
+  end
+  namespace :instrumental do
+    namespace :util do
+      desc "marker for beginning of deploy"
+      task :deploy_start do
+        set :instrumental_deploy_start, Time.now
+      end
+      desc "marker for end of deploy"
+      task :deploy_end do
+        set :instrumental_deploy_end, Time.now
+      end
+    end
+    desc "send a notice to instrumental about the deploy"
+    task :record_deploy_notice do
+      start_at                   = fetch(:instrumental_deploy_start, Time.now)
+      end_at                     = fetch(:instrumental_deploy_end, start_at)
+      deploy_duration_in_seconds = end_at - start_at
+      deployer                   = fetch(:deployer)
+      agent_options              = { :synchronous => true }
+      agent_options[:collector]  = instrumental_host if fetch(:instrumental_host, false)
+      agent                      = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
+      message                    = fetch(:deploy_message, "#{deployer} deployed #{current_revision}")
+      agent.notice(message,
+                   start_at,
+                   deploy_duration_in_seconds)
+      logger.info("Notified Instrumental of deployment")
+    end
+  end
+end

data/lib/instrumental/capistrano/capistrano3.rake ADDED

@@ -0,0 +1,56 @@
+require "etc"
+require "instrumental_agent"
+namespace :load do
+  task :defaults do
+    set :instrumental_hooks, true
+    set :instrumental_key,   nil
+    set :deployer,           Etc.getlogin.chomp
+  end
+end
+namespace :deploy do
+  before :starting, :check_instrumental_hooks do
+    invoke "instrumental:util:add_hooks" if fetch(:instrumental_hooks)
+  end
+end
+namespace :instrumental do
+  namespace :util do
+    desc "add instrumental hooks to deploy"
+    task :add_hooks do
+      before "deploy", "instrumental:util:deploy_start"
+      after  "deploy", "instrumental:util:deploy_end"
+      after  "instrumental:util:deploy_end", "instrumental:record_deploy_notice"
+    end
+    desc "marker for beginning of deploy"
+    task :deploy_start do
+      set :instrumental_deploy_start, Time.now
+    end
+    desc "marker for end of deploy"
+    task :deploy_end do
+      set :instrumental_deploy_end, Time.now
+    end
+  end
+  desc "send a notice to instrumental about the deploy"
+  task :record_deploy_notice do
+    start_at                   = fetch(:instrumental_deploy_start, Time.now)
+    end_at                     = fetch(:instrumental_deploy_end, start_at)
+    deploy_duration_in_seconds = end_at - start_at
+    deployer                   = fetch(:deployer)
+    agent_options              = { :synchronous => true }
+    agent_options[:collector]  = instrumental_host if fetch(:instrumental_host, false)
+    message                    = fetch(:deploy_message, "#{deployer} deployed #{fetch(:current_revision)}".strip)
+    if fetch(:instrumental_key)
+      agent = Instrumental::Agent.new(fetch(:instrumental_key), agent_options)
+      agent.notice(message,
+                   start_at,
+                   deploy_duration_in_seconds)
+      puts "Notified Instrumental of deployment"
+    end
+  end
+end

data/lib/instrumental/command_structs.rb ADDED

@@ -0,0 +1,32 @@
+module Instrumental
+  METRIC_TYPES = ["increment".freeze, "gauge".freeze].freeze
+  Command = Struct.new(:command, :metric, :value, :time, :count) do
+    def initialize(command, metric, value, time, count)
+      super(command, metric, value, time.to_i, count.to_i)
+    end
+    def to_s
+      [command, metric, value, time, count].map(&:to_s).join(" ")
+    end
+    def metadata
+      "#{metric}:#{time}".freeze
+    end
+    def +(other_command)
+      return self if other_command.nil?
+      Command.new(command, metric, value + other_command.value, time, count + other_command.count)
+    end
+  end
+  Notice = Struct.new(:note, :time, :duration) do
+    def initialize(note, time, duration)
+      super(note, time.to_i, duration.to_i)
+    end
+    def to_s
+      ["notice".freeze, time, duration, note].map(&:to_s).join(" ")
+    end
+  end
+end

data/lib/instrumental/event_aggregator.rb ADDED

@@ -0,0 +1,26 @@
+class EventAggregator
+  attr_accessor :counts, :values, :received_at, :frequency
+  def initialize(frequency:)
+    @values = Hash.new
+    @frequency = frequency
+  end
+  def put(command)
+    command_at = command.time
+    unless(command_at % frequency == 0)
+      command.time = (command_at - (command_at % frequency))
+    end
+    metadata = command.metadata
+    @values[metadata] = (command + @values[metadata])
+  end
+  def size
+    @values.size
+  end
+  def coerce_time(time)
+    itime = time.to_i
+    (itime - (itime % frequency)).to_i
+  end
+end

data/lib/instrumental/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Instrumental
-  VERSION = "1.0.1"
+  VERSION = "3.0.0.beta"
 end

data/script/setup CHANGED

@@ -2,7 +2,25 @@
 set -e
 cd "$(dirname "$0")/.."
-type rbenv || $(brew install rbenv; brew install ruby-build)
-rbenv which ruby || $(brew upgrade ruby-build || true; rbenv install)
-gem list -i bundler || gem install bundler
-bundle install
+rbenv which ruby >/dev/null 2>&1 || (brew upgrade ruby-build || true; rbenv install)
+# Setup rbenv so we can switch rubies below
+eval "$(rbenv init - --no-rehash)"
+for ruby_version in `ruby -ryaml -e 'puts YAML.load(File.read(".travis.yml"))["rvm"].join(" ")'`; do
+  rbenv versions --bare | grep "^${ruby_version}$" || rbenv install $ruby_version
+  rbenv shell $ruby_version
+  gem list -i bundler >/dev/null || gem install bundler
+  bundle install
+done
+tput bold    # bold text
+tput setaf 2 # green text
+echo "****************************************************************"
+echo "*                                                              *"
+echo "*                         Good to go!                          *"
+echo "*                                                              *"
+echo "****************************************************************"
+tput sgr0    # reset to default text

data/script/test CHANGED

@@ -2,5 +2,37 @@
 set -e
 cd "$(dirname "$0")/.."
-script/setup
-bundle exec rspec
+eval "$(rbenv init - --no-rehash)"
+rspec_file_line="$1"
+if [[ "$rspec_file_line" != "" ]]; then
+  rspec_file_line="[${rspec_file_line}]"
+fi
+success="true"
+for ruby_version in `ruby -ryaml -e 'puts YAML.load(File.read(".travis.yml"))["rvm"].join(" ")'`; do
+  {
+    echo "testing ruby version $ruby_version" &&
+      rbenv shell $ruby_version &&
+      bundle exec rspec
+  } || success="false"
+done
+if [ $success == "true" ]; then
+  tput bold    # bold text
+  tput setaf 2 # green text
+  echo "======================================"
+  echo "=              Passed                ="
+  echo "======================================"
+  tput sgr0    # reset to default text
+  exit 0
+else
+  tput bold    # bold text
+  tput setaf 1 # red text
+  echo "======================================"
+  echo "=              FAILED                ="
+  echo "======================================"
+  tput sgr0    # reset to default text
+  exit 1
+fi

data/spec/agent_spec.rb CHANGED

@@ -1,7 +1,21 @@
 require 'spec_helper'
-def wait(n=0.2)
-  sleep n # FIXME: hack
+def wait(n=0.2, &block)
+  start = Time.now
+  if block_given?
+    begin
+      yield
+    rescue Exception => ex
+      if (Time.now - start) < 5
+        sleep n
+        retry
+      else
+        raise ex
+      end
+    end
+  else
+    sleep n
+  end
 end
 FORK_SUPPORTED = begin
@@ -24,7 +38,9 @@ shared_examples "Instrumental Agent" do
     let(:synchronous)  { false }
     let(:token)        { 'test_token' }
     let(:address)      { server.host_and_port }
-    let(:agent)        { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?) }
+    let(:metrician)    { false }
+    let(:frequency)    { 0 }
+    let(:agent)        { Instrumental::Agent.new(token, :collector => address, :synchronous => synchronous, :enabled => enabled, :secure => secure?, :verify_cert => verify_cert?, :metrician => metrician, :frequency => frequency) }
     # Server options
     let(:listen)       { true }
@@ -32,6 +48,12 @@ shared_examples "Instrumental Agent" do
     let(:authenticate) { true }
     let(:server)       { TestServer.new(:listen => listen, :authenticate => authenticate, :response => response, :secure => secure?) }
+    # Time Travel Options
+    let(:start_of_minute) do
+      now = Time.now.to_i
+      Time.at(now - (now % 60))
+    end
     before do
       Instrumental::Agent.logger.level = Logger::UNKNOWN
       @server = server
@@ -52,31 +74,36 @@ shared_examples "Instrumental Agent" do
       it "should not connect to the server after receiving a metric" do
         agent.gauge('disabled_test', 1)
-        wait
-        expect(server.connect_count).to eq(0)
+        wait do
+          expect(server.connect_count).to eq(0)
+        end
       end
       it "should no op on flush without reconnect" do
         1.upto(100) { agent.gauge('disabled_test', 1) }
         agent.flush(false)
-        wait
-        expect(server.commands).to be_empty
+        wait do
+          expect(server.commands).to be_empty
+        end
       end
       it "should no op on flush with reconnect" do
         1.upto(100) { agent.gauge('disabled_test', 1) }
         agent.flush(true)
-        wait
-        expect(server.commands).to be_empty
+        wait do
+          expect(server.commands).to be_empty
+        end
       end
       it "should no op on an empty flush" do
         agent.flush(true)
-        wait
-        expect(server.commands).to be_empty
+        wait do
+          expect(server.commands).to be_empty
+        end
       end
       it "should send metrics to logger" do
+        Timecop.freeze
         now = Time.now
         expect(agent.logger).to receive(:debug).with("gauge metric 1 #{now.to_i} 1")
         agent.gauge("metric", 1)
@@ -91,14 +118,16 @@ shared_examples "Instrumental Agent" do
       it "should connect to the server after sending a metric" do
         agent.increment("test.foo")
-        wait
-        expect(server.connect_count).to eq(1)
+        wait do
+          expect(server.connect_count).to eq(1)
+        end
       end
       it "should announce itself, and include version" do
         agent.increment("test.foo")
-        wait
-        expect(server.commands[0]).to match(/hello .*/)
+        wait do
+          expect(server.commands[0]).to match(/hello .*/)
+        end
         expect(server.commands[0]).to match(/ version /)
         expect(server.commands[0]).to match(/ hostname /)
         expect(server.commands[0]).to match(/ pid /)
@@ -108,15 +137,18 @@ shared_examples "Instrumental Agent" do
       it "should authenticate using the token" do
         agent.increment("test.foo")
-        wait
-        expect(server.commands[1]).to eq("authenticate test_token")
+        wait do
+          expect(server.commands[1]).to eq("authenticate test_token")
+        end
       end
       it "should report a gauge" do
+        Timecop.freeze
         now = Time.now
         agent.gauge('gauge_test', 123)
-        wait
-        expect(server.commands.last).to eq("gauge gauge_test 123 #{now.to_i} 1")
+        wait do
+          expect(server.commands.last).to eq("gauge gauge_test 123 #{now.to_i} 1")
+        end
       end
       it "should report a time as gauge and return the block result" do
@@ -125,8 +157,9 @@ shared_examples "Instrumental Agent" do
           1 + 1
         end
         expect(return_value).to eq(2)
-        wait
-        expect(server.commands.last).to match(/gauge time_value_test .* #{now.to_i}/)
+        wait do
+          expect(server.commands.last).to match(/gauge time_value_test .* #{now.to_i}/)
+        end
       end
       it "should report a time_ms as gauge and return the block result" do
@@ -136,8 +169,9 @@ shared_examples "Instrumental Agent" do
           1 + 1
         end
         expect(return_value).to eq(2)
-        wait
-        expect(server.commands.last).to match(/gauge time_value_test 1000/)
+        wait do
+          expect(server.commands.last).to match(/gauge time_value_test 1000/)
+        end
       end
       it "should return the value gauged" do
@@ -147,21 +181,24 @@ shared_examples "Instrumental Agent" do
       it "should report a gauge with a set time" do
         agent.gauge('gauge_test', 123, 555)
-        wait
-        expect(server.commands.last).to eq("gauge gauge_test 123 555 1")
+        wait do
+          expect(server.commands.last).to eq("gauge gauge_test 123 555 1")
+        end
       end
       it "should report a gauge with a set time and count" do
         agent.gauge('gauge_test', 123, 555, 111)
-        wait
-        expect(server.commands.last).to eq("gauge gauge_test 123 555 111")
+        wait do
+          expect(server.commands.last).to eq("gauge gauge_test 123 555 111")
+        end
       end
       it "should report an increment" do
         now = Time.now
         agent.increment("increment_test")
-        wait
-        expect(server.commands.last).to eq("increment increment_test 1 #{now.to_i} 1")
+        wait do
+          expect(server.commands.last).to eq("increment increment_test 1 #{now.to_i} 1")
+        end
       end
       it "should return the value incremented by" do
@@ -172,33 +209,43 @@ shared_examples "Instrumental Agent" do
       it "should report an increment a value" do
         now = Time.now
         agent.increment("increment_test", 2)
-        wait
-        expect(server.commands.last).to eq("increment increment_test 2 #{now.to_i} 1")
+        wait do
+          expect(server.commands.last).to eq("increment increment_test 2 #{now.to_i} 1")
+        end
       end
       it "should report an increment with a set time" do
         agent.increment('increment_test', 1, 555)
-        wait
-        expect(server.commands.last).to eq("increment increment_test 1 555 1")
+        wait do
+          expect(server.commands.last).to eq("increment increment_test 1 555 1")
+        end
       end
       it "should report an increment with a set time and count" do
         agent.increment('increment_test', 1, 555, 111)
-        wait
-        expect(server.commands.last).to eq("increment increment_test 1 555 111")
+        wait do
+          expect(server.commands.last).to eq("increment increment_test 1 555 111")
+        end
       end
-      it "should discard data that overflows the buffer" do
-        with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
-          5.times do |i|
-            agent.increment('overflow_test', i + 1, 300)
+      context do
+        let(:listen) { false }
+        it "should discard data that overflows the buffer" do
+          with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
+            allow(agent.logger).to receive(:debug)
+            expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
+            expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 5 300 1")
+            1.upto(5) do |i|
+              agent.increment('overflow_test', i, 300)
+            end
+            wait
+            expect(agent.sender_queue.size).to eq(3)
+            expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 1 300 1")
+            expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 2 300 1")
+            expect(agent.sender_queue.pop.first.to_s).to start_with("increment overflow_test 3 300 1")
+            expect(agent.sender_queue.size).to eq(0)
           end
-          wait
-          expect(server.commands).to include("increment overflow_test 1 300 1")
-          expect(server.commands).to include("increment overflow_test 2 300 1")
-          expect(server.commands).to include("increment overflow_test 3 300 1")
-          expect(server.commands).to_not include("increment overflow_test 4 300 1")
-          expect(server.commands).to_not include("increment overflow_test 5 300 1")
         end
       end
@@ -208,7 +255,7 @@ shared_examples "Instrumental Agent" do
           5.times do |i|
             agent.increment('overflow_test', i + 1, 300)
           end
-          expect(agent.instance_variable_get(:@queue).size).to eq(0)
+          expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
           wait # let the server receive the commands
           expect(server.commands).to include("increment overflow_test 1 300 1")
           expect(server.commands).to include("increment overflow_test 2 300 1")
@@ -224,8 +271,10 @@ shared_examples "Instrumental Agent" do
           fork do
             agent.increment('fork_reconnect_test', 1, 3) # triggers reconnect
           end
           wait(1)
           agent.increment('fork_reconnect_test', 1, 4) # triggers reconnect
           wait(1)
           expect(server.connect_count).to eq(2)
@@ -241,24 +290,25 @@ shared_examples "Instrumental Agent" do
           sleep 1
         }
-        run_worker_loop_calls = 0
-        allow(agent).to receive(:run_worker_loop) {
-          run_worker_loop_calls += 1
+        run_sender_loop_calls = 0
+        allow(agent).to receive(:run_sender_loop) {
+          run_sender_loop_calls += 1
           sleep 3 # keep the worker thread alive
         }
         t = Thread.new { agent.increment("race") }
         agent.increment("race")
         wait(2)
-        expect(run_worker_loop_calls).to eq(1)
-        expect(agent.queue.size).to eq(2)
+        expect(run_sender_loop_calls).to eq(1)
+        expect(agent.sender_queue.size).to eq(2)
       end
       it "should never let an exception reach the user" do
         expect(agent).to receive(:send_command).twice { raise(Exception.new("Test Exception")) }
         expect(agent.increment('throws_exception', 2)).to eq(nil)
-        wait
-        expect(agent.increment('throws_exception', 234)).to eq(nil)
+        wait do
+          expect(agent.increment('throws_exception', 234)).to eq(nil)
+        end
       end
       it "should let exceptions in time bubble up" do
@@ -273,27 +323,22 @@ shared_examples "Instrumental Agent" do
         expect(agent.increment("test")).to eq(nil)
       end
-      it "should track invalid metrics" do
-        expect(agent.logger).to receive(:warn).with(/%%/)
-        agent.increment(' %% .!#@$%^&*', 1, 1)
-        wait
-        expect(server.commands.join("\n")).to include("increment agent.invalid_metric")
-      end
       it "should allow reasonable metric names" do
         agent.increment('a')
         agent.increment('a.b')
         agent.increment('hello.world')
         agent.increment('ThisIsATest.Of.The.Emergency.Broadcast.System.12345')
-        wait
-        expect(server.commands.join("\n")).to_not include("increment agent.invalid_metric")
+        wait do
+          expect(server.commands.join("\n")).to_not include("increment agent.invalid_metric")
+        end
       end
       it "should track invalid values" do
         expect(agent.logger).to receive(:warn).with(/hello.*testington/)
         agent.increment('testington', 'hello')
-        wait
-        expect(server.commands.join("\n")).to include("increment agent.invalid_value")
+        wait do
+          expect(server.commands.join("\n")).to include("increment agent.invalid_value")
+        end
       end
       it "should allow reasonable values" do
@@ -305,61 +350,86 @@ shared_examples "Instrumental Agent" do
         agent.increment('a',  2.2)
         agent.increment('a',  333.333)
         agent.increment('a',  Float::EPSILON)
-        wait
-        expect(server.commands.join("\n")).to_not include("increment agent.invalid_value")
+        wait do
+          expect(server.commands.join("\n")).to_not include("increment agent.invalid_value")
+        end
       end
       it "should send notices to the server" do
+        Timecop.freeze
         tm = Time.now
         agent.notice("Test note", tm)
-        wait
-        expect(server.commands.join("\n")).to include("notice #{tm.to_i} 0 Test note")
+        wait do
+          expect(server.commands.join("\n")).to include("notice #{tm.to_i} 0 Test note")
+        end
       end
       it "should prevent a note w/ newline characters from being sent to the server" do
-        expect(agent.notice("Test note\n")).to eq(nil)
-        wait
-        expect(server.commands.join("\n")).to_not include("notice Test note")
+        expect(agent.notice("Test_bad_note\n")).to eq(nil)
+        # Send a note that make it through so we're sure the bad note would have
+        # arrived if it was going to.
+        Timecop.freeze
+        tm = Time.now
+        agent.notice("Test_good_note", tm)
+        wait do
+          expect(server.commands.join("\n")).to include("Test_good_note")
+        end
+        expect(server.commands.join("\n")).to_not include("Test_bad_note")
       end
       it "should allow outgoing metrics to be stopped" do
         tm = Time.now
         agent.increment("foo.bar", 1, tm)
         agent.stop
+        # In Java the test server hangs sometimes when the agent disconnects so
+        # this cleans up the server.
+        server.stop
         wait
-        agent.increment("foo.baz", 1, tm)
+        server.listen
         wait
-        expect(server.commands.join("\n")).to include("increment foo.baz 1 #{tm.to_i}")
+        agent.increment("foo.baz", 1, tm)
+        wait do
+          expect(server.commands.join("\n")).to include("increment foo.baz 1 #{tm.to_i}")
+        end
         expect(server.commands.join("\n")).to_not include("increment foo.bar 1 #{tm.to_i}")
       end
       it "should allow flushing pending values to the server" do
         1.upto(100) { agent.gauge('a', rand(50)) }
-        expect(agent.instance_variable_get(:@queue).size).to be > 0
+        expect(agent.instance_variable_get(:@sender_queue).size).to be > 0
         agent.flush
-        expect(agent.instance_variable_get(:@queue).size).to eq(0)
-        wait
-        expect(server.commands.grep(/^gauge a /).size).to eq(100)
+        expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
+        wait do
+          expect(server.commands.grep(/^gauge a /).size).to eq(100)
+        end
       end
       it "should no op on an empty flush" do
         agent.flush(true)
-        wait
-        expect(server.commands).to be_empty
+        wait do
+          expect(server.commands).to be_empty
+        end
       end
     end
     describe Instrumental::Agent, "connection problems" do
       it "should automatically reconnect on disconnect" do
-        agent.increment("reconnect_test", 1, 1234)
-        wait
+        agent.increment("reconnect_test1", 1, 1234)
+        wait do
+          expect(server.commands.grep(/reconnect_test1/).size).to eq(1)
+        end
         server.disconnect_all
         wait(1)
         agent.increment('reconnect_test', 1, 5678) # triggers reconnect
-        wait(1)
-        expect(server.connect_count).to eq(2)
-        # Ensure the last command sent has been received after the reconnect attempt
-        expect(server.commands.last).to eq("increment reconnect_test 1 5678 1")
+        wait do
+          expect(server.connect_count).to eq(2)
+          # Ensure the last command sent has been received after the reconnect attempt
+          expect(server.commands.last).to eq("increment reconnect_test 1 5678 1")
+        end
       end
       context 'not listening' do
@@ -370,7 +440,7 @@ shared_examples "Instrumental Agent" do
           agent.increment('reconnect_test', 1, 1234)
           wait
           # The agent should not have sent the metric yet, the server is not responding
-          expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
+          expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
         end
         it "should warn once when buffer is full" do
@@ -405,55 +475,124 @@ shared_examples "Instrumental Agent" do
           agent.increment('reconnect_test', 1, 1234)
           wait
           # Since server hasn't responded to hello or authenticate, worker thread will not send data
-          expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
+          expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
         end
       end
       context 'server hangup' do
         it "should cancel the worker thread when the host has hung up" do
           # Start the background agent thread and let it send one metric successfully
-          agent.gauge('connection_failure', 1, 1234)
-          wait
+          agent.gauge('connection_failure1', 1, 1234)
+          wait do
+            expect(server.commands.grep(/connection_failure/).size).to eq(1)
+          end
           # Stop the server
           server.stop
           wait
           # Send one metric to the stopped server
-          agent.gauge('connection_failure', 1, 1234)
-          wait
+          agent.gauge('connection_failure2', 1, 1234)
           # The agent thread should have stopped running since the network write would
           # have failed. The queue will still contain the metric that has yet to be sent
-          expect(agent.send(:running?)).to eq(false)
-          expect(agent.queue.size).to eq(1)
+          wait do
+            expect(agent.send(:running?)).to eq(false)
+          end
+          expect(agent.sender_queue.size).to eq(1)
         end
         it "should restart the worker thread after hanging it up during an unreachable host event" do
           # Start the background agent thread and let it send one metric successfully
           agent.gauge('connection_failure', 1, 1234)
-          wait
+          wait do
+            expect(server.commands.grep(/connection_failure/).size).to eq(1)
+          end
           # Stop the server
           server.stop
           wait
           # Send one metric to the stopped server
           agent.gauge('connection_failure', 1, 1234)
-          wait
           # The agent thread should have stopped running since the network write would
           # have failed. The queue will still contain the metric that has yet to be sent
-          expect(agent.send(:running?)).to eq(false)
-          expect(agent.queue.size).to eq(1)
-          wait
+          wait do
+            expect(agent.send(:running?)).to eq(false)
+          end
+          expect(agent.sender_queue.size).to eq(1)
           # Start the server back up again
           server.listen
-          wait
           # Sending another metric should kickstart the background worker thread
           agent.gauge('connection_failure', 1, 1234)
-          wait
           # The agent should now be running the background thread, and the queue should be empty
-          expect(agent.send(:running?)).to eq(true)
-          expect(agent.queue.size).to eq(0)
+          wait do
+            expect(agent.send(:running?)).to eq(true)
+            expect(agent.sender_queue.size).to eq(0)
+          end
         end
-      end
+        it "should restart the worker thread after hanging it up during a bad ssl handshake event" do
+          # Start the background agent thread and let it send one metric successfully
+          agent.gauge('connection_failure', 1, 1234)
+          wait do
+            expect(server.commands.grep(/connection_failure/).size).to eq(1)
+          end
+          # Make the agent return the relevant exception on the next connection test
+          test_connection_fail = true
+          tc = agent.method(:test_connection)
+          allow(agent).to receive(:test_connection) do |*args, &block|
+            test_connection_fail ? raise(OpenSSL::SSL::SSLError.new) : tc.call(*args)
+          end
+          # Send one metric to the agent
+          agent.gauge('connection_failure', 1, 1234)
+          # The agent thread should have stopped running since the network write would
+          # have failed.
+          wait do
+            expect(agent.send(:running?)).to eq(false)
+          end
+          # The command is not in the queue
+          expect(agent.sender_queue.size).to eq(0)
+          # allow the agent to behave normally
+          test_connection_fail = false
+          # Sending another metric should kickstart the background worker thread
+          agent.gauge('connection_failure', 1, 1234)
+          # The agent should now be running the background thread, and the queue should be empty
+          wait do
+            expect(agent.send(:running?)).to eq(true)
+            expect(agent.sender_queue.size).to eq(0)
+            expect(server.commands.grep(/connection_failure/).size).to eq(2)
+          end
+        end
+        it "should accurately count failures so that backoff can work as intended" do
+          # Start the background agent thread and let it send one metric successfully
+          agent.gauge('connection_failure', 1, 1234)
+          wait do
+            expect(server.commands.grep(/connection_failure/).size).to eq(1)
+          end
+          # configure test_connection to fail in a way that won't kill the inner loop
+          test_connection_fail = true
+          tc = agent.method(:test_connection)
+          allow(agent).to receive(:test_connection) do |*args, &block|
+            test_connection_fail ? raise("test_connection_fail") : tc.call(*args)
+          end
+          # send some metrics
+          agent.gauge('connection_failure_1', 1, 1234)
+          agent.gauge('connection_failure_2', 1, 1234)
+          agent.gauge('connection_failure_3', 1, 1234)
+          wait do
+            expect(agent.instance_variable_get(:@failures)).to be > 0
+            expect(agent.sender_queue.size).to be > 0
+          end
+          # let the loop proceed
+          test_connection_fail = false
+          wait do
+            expect(agent.send(:running?)).to eq(true)
+            expect(agent.sender_queue.size).to eq(0)
+          end
+        end
+      end
       context 'not authenticating' do
         # Server will fail all authentication attempts
@@ -463,7 +602,7 @@ shared_examples "Instrumental Agent" do
           agent.increment('reconnect_test', 1, 1234)
           wait
           # Metrics should not have been sent since all authentication failed
-          expect(agent.queue.pop(true)).to include("increment reconnect_test 1 1234 1\n")
+          expect(agent.sender_queue.pop(true).first.to_s).to eq("increment reconnect_test 1 1234 1")
         end
       end
@@ -500,7 +639,7 @@ shared_examples "Instrumental Agent" do
         it "should not wait to exit a process if there are no commands queued" do
           allow(agent).to receive(:open_socket) { |*args, &block| sleep(5) && block.call }
           with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
-            if (pid = fork { agent.increment('foo', 1); agent.queue.clear })
+            if (pid = fork { agent.increment('foo', 1); agent.sender_queue.clear })
               tm = Time.now.to_f
               Process.wait(pid)
               diff = Time.now.to_f - tm
@@ -512,11 +651,19 @@ shared_examples "Instrumental Agent" do
       it "should not wait longer than EXIT_FLUSH_TIMEOUT to attempt flushing the socket when disconnecting" do
         agent.increment('foo', 1)
-        wait
+        wait do
+          expect(server.commands.grep(/foo/).size).to eq(1)
+        end
         expect(agent).to receive(:flush_socket) do
           r, w = IO.pipe
-          Thread.new do
-            IO.select([r]) # mimic an endless blocking select poll
+          Thread.new do # JRuby requires extra thread here according to e9bb707e
+            begin
+              IO.select([r]) # mimic an endless blocking select poll
+            rescue Object => ex
+              # This rescue-raise prevents JRuby from printing a backtrace at
+              # the end of the run complaining about an exception in this thread.
+              raise
+            end
           end.join
         end
         with_constants('Instrumental::Agent::EXIT_FLUSH_TIMEOUT' => 3) do
@@ -578,7 +725,7 @@ shared_examples "Instrumental Agent" do
           expect(agent.send(:running?)).to eq(true)
           # Setup a failure for the next command so we'll break out of the inner
-          # loop in run_worker_loop causing another call to open_socket
+          # loop in run_sender_loop causing another call to open_socket
           test_connection_fail = true
           tc = agent.method(:test_connection)
           allow(agent).to receive(:test_connection) { |*args, &block| test_connection_fail ? raise("fail") : tc.call(*args) }
@@ -629,6 +776,329 @@ shared_examples "Instrumental Agent" do
         end
       end
     end
+    describe Instrumental::Agent, "metrician" do
+      context "enabled" do
+        let(:metrician) { true }
+        it "is enabled by default" do
+          a = agent
+          expect(Metrician.agent).to eq(a)
+        end
+        it "uses agent logger" do
+          new_logger = double
+          agent.logger = new_logger
+          expect(Metrician.logger).to eq(new_logger)
+        end
+      end
+      context "disabled" do
+        let(:metrician) { false }
+        it "can be disbaled" do
+          expect(Metrician).to_not receive(:activate)
+          agent = Instrumental::Agent.new('test-token', :metrician => false)
+        end
+      end
+    end
+    describe Instrumental::Agent, "aggregation" do
+      context "aggregation enabled" do
+        let(:frequency) { 2 }
+        it "can be enabled at Agent.new time" do
+          expect(agent.frequency).to eq(2)
+        end
+        it "can be modified by setting the agent frequency" do
+          agent.frequency = 15
+          expect(agent.frequency).to eq(15)
+        end
+        it "is disabled by default" do
+          agent = Instrumental::Agent.new('test_token')
+          expect(agent.frequency.to_f).to eq(0)
+        end
+        it "should only allow frequencies that align with minutes" do
+          (-5..100).each do |freq|
+            agent.frequency = freq
+            expect(Instrumental::Agent::VALID_FREQUENCIES).to include(agent.frequency)
+          end
+        end
+        it "bypasses aggregator queue entirely for most commands when frequency == 0" do
+          agent.frequency = 0 # this is red - 0 for green
+          expect(EventAggregator).not_to receive(:new)
+          agent.increment('a_metric')
+        end
+        it "adds data to the event aggregator and does not immediately send it" do
+          Timecop.travel start_of_minute
+          agent.increment('test')
+          wait do
+            expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
+            expect(agent.instance_variable_get(:@event_aggregator).values.values.first.metric).to eq('test')
+          end
+        end
+        it "batches data before sending" do
+          Timecop.freeze do
+            agent.increment('a_metric')
+            agent.increment('a_metric')
+            agent.increment('another_metric')
+          end
+          agent.flush(true)
+          wait do
+            expect(server.commands.grep(/_metric/).size).to eq(2)
+            aggregated_metric = server.commands.grep(/a_metric/).first.split(" ")
+            expect(aggregated_metric[2].to_i).to eq(2) # value
+            expect(aggregated_metric[4].to_i).to eq(2) # count
+          end
+        end
+        it "aggregates to the specified frequency within the aggregator" do
+          Timecop.travel(start_of_minute)
+          agent.frequency = 15
+          expect(agent.frequency).not_to be(Instrumental::Agent::DEFAULT_FREQUENCY)
+          agent.increment('metric', 1, Time.at(0))
+          # will get aligned to the closest frequency (15)
+          agent.increment('metric', 1, Time.at(20))
+          wait do
+            expect(agent.instance_variable_get(:@event_aggregator).values.keys).to eq(["metric:0", "metric:15"])
+          end
+          agent.flush
+          wait do
+            expect(server.commands.grep(/metric 1 0/).size).to eq(1)
+            expect(server.commands.grep(/metric 1 15/).size).to eq(1)
+          end
+        end
+        it "flushes data from both queues before sending" do
+          Timecop.freeze do
+            100.times do |i|
+              agent.increment("test_metric_#{i}")
+              agent.increment("other_metric")
+            end
+          end
+          expect(agent.instance_variable_get(:@aggregator_queue).size).to be > 0
+          agent.flush
+          expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
+          expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+          wait do
+            expect(server.commands.grep(/test_metric/).size).to eq(100)
+            expect(server.commands.grep(/other_metric/).size).to eq(1)
+          end
+        end
+        it "does not batch notices" do
+          agent.frequency = 60
+          agent.notice "things are happening", 0, 100
+          agent.notice "things are happening", 0, 100
+          agent.notice "things are happening", 0, 100
+          wait do
+            expect(server.commands.grep(/things are happening/).size).to eq(3)
+          end
+        end
+        it "can be disabled by setting frequency to nil" do
+          agent.frequency = nil
+          expect(EventAggregator).not_to receive(:new)
+          agent.increment('metric')
+          wait do
+            expect(server.commands.grep(/metric/).size).to eq(1)
+          end
+        end
+        it "can be disabled by setting frequency to 0" do
+          agent.frequency = 0
+          expect(EventAggregator).not_to receive(:new)
+          agent.increment('metric')
+          wait do
+            expect(server.commands.grep(/metric/).size).to eq(1)
+          end
+        end
+        it "automatically uses the highest-without-going-over frequency for a bad frequency" do
+          agent.frequency = 17
+          expect(agent.frequency).to eq(15)
+          agent.frequency = 69420
+          expect(agent.frequency).to eq(60)
+          agent.frequency = 0
+          expect(agent.frequency).to eq(0)
+          agent.frequency = -1
+          expect(agent.frequency).to eq(0)
+        end
+        it "can take strings as frequency" do
+          agent = Instrumental::Agent.new('test_token', :frequency => "15")
+          expect(agent.frequency).to eq(15)
+        end
+        it "should not be enabled at the same time as synchronous" do
+          expect(Instrumental::Agent.logger).to receive(:warn).with(/Synchronous and Frequency should not be enabled at the same time! Defaulting to synchronous mode./)
+          agent = Instrumental::Agent.new('test_token', :synchronous => true, :frequency => 6)
+          expect(agent.synchronous).to eq(true)
+          expect(agent.frequency).to eq(0)
+        end
+        it "should use synchronous mode if it is enabled, even if turned on after frequency set at start" do
+          agent.increment('metric')
+          agent.increment('metric')
+          agent.synchronous = true
+          agent.increment('metric')
+          wait do
+            expect(server.commands.grep(/metric 1/).size).to eq(1)
+          end
+          agent.flush
+          wait do
+            expect(server.commands.grep(/metric 1/).size).to eq(1)
+            expect(server.commands.grep(/metric 2/).size).to eq(1)
+          end
+        end
+        it "sends aggregated metrics after specified frequency, even if no flush is sent" do
+          agent.frequency = 1
+          Timecop.travel(start_of_minute)
+          agent.increment('metric')
+          agent.increment('metric')
+          agent.gauge('other', 1)
+          agent.gauge('other', 1)
+          agent.gauge('other', 1)
+          sleep (0.5)
+          wait { expect(server.commands.grep(/metric/).size).to eq(0) }
+          sleep (0.51) # total sleep > 1 frequency
+          expect(server.commands.grep(/metric 2/).size).to eq(1)
+          expect(server.commands.grep(/other 3/).size).to eq(1)
+        end
+        # this test really relies on the worker threads not working unexpectedly
+        it "will overflow if the aggregator queue is full" do
+          Timecop.travel(start_of_minute)
+          with_constants('Instrumental::Agent::MAX_BUFFER' => 3) do
+            allow(agent.logger).to receive(:debug)
+            expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_test 4 300 1")
+            agent.increment('overflow_test', 4, 300, 1)
+            agent.increment('overflow_test', 4, 300, 1)
+            agent.increment('overflow_test', 4, 300, 1)
+            agent.increment('overflow_test', 4, 300, 1)
+            expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
+            agent.flush
+            expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+          end
+        end
+        it "if aggregator is at max size, next command will force a forward to the sender thread" do
+          Timecop.travel(start_of_minute)
+          with_constants('Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 3) do
+            agent.increment('overflow_test1')
+            agent.increment('overflow_test2')
+            agent.increment('overflow_test3')
+            agent.increment('overflow_test4')
+            agent.increment('overflow_test5')
+            # only 1 because the 5th command triggers a forward of the first 4
+            wait do
+              expect(agent.instance_variable_get(:@event_aggregator).size).to eq(1)
+            end
+            agent.flush
+            wait do
+              expect(server.commands.grep(/overflow_test/).size).to eq(5)
+            end
+          end
+        end
+        context do
+          let(:listen) { false }
+          it "will not send aggregators to the sender queue if the sender thread is not ready" do
+            Timecop.travel(start_of_minute)
+            agent.frequency = 1
+            with_constants('Instrumental::Agent::MAX_BUFFER' => 3,
+                          'Instrumental::Agent::MAX_AGGREGATOR_SIZE' => 4) do
+              # fill the queue
+              agent.increment('overflow_test1')
+              agent.increment('overflow_test2')
+              agent.increment('overflow_test3')
+              # wait until they are all in the aggregator
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+                expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
+              end
+              # fill the queue again
+              agent.increment('overflow_test1')
+              agent.increment('overflow_test2')
+              agent.increment('overflow_test3')
+              # wait until they are all in the aggregator
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+                expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(0)
+              end
+              # wait for the aggregator to get forwarded and popped by the sender
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+                expect(agent.instance_variable_get(:@event_aggregator)).to eq(nil)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
+              end
+              # fill the queue again
+              agent.increment('overflow_test4')
+              agent.increment('overflow_test5')
+              agent.increment('overflow_test6')
+              # wait for them all to be in the aggregator
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(0)
+                expect(agent.instance_variable_get(:@event_aggregator).size).to eq(3)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
+              end
+              # sleep until the next forward is done
+              sleep(agent.frequency + 0.1)
+              # fill the queue again
+              agent.increment('overflow_test7')
+              agent.increment('overflow_test8')
+              agent.increment('overflow_test9')
+              # because sending is blocked, the prevous aggregator never sent
+              # when it hits max size, the aggregator queue starts backing up
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(1)
+                expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
+              end
+              # send 3 more items, to overflow the aggregator queue
+              allow(agent.logger).to receive(:debug)
+              expect(agent.logger).to receive(:debug).with("Dropping command, queue full(3): increment overflow_testc 4 300 1")
+              agent.increment('overflow_testa')
+              agent.increment('overflow_testb')
+              agent.increment('overflow_testc', 4, 300, 1) # will get dropped
+              wait do
+                expect(agent.instance_variable_get(:@aggregator_queue).size).to eq(3)
+                expect(agent.instance_variable_get(:@event_aggregator).size).to eq(5)
+                expect(agent.instance_variable_get(:@sender_queue).size).to eq(1)
+              end
+            end
+          end
+        end
+      end
+    end
   end
 end