RubyGems - josephruscio-aggregate - Versions diffs - 0.0.1 → 0.0.3 - Mend

josephruscio-aggregate 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/README +2 -0
data/{aggregate.rb → lib/aggregate.rb} +38 -25
data/test/ts_aggregate.rb +134 -0
metadata +7 -4

data/README ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ Aggregate is a ruby implementation of a statistics aggregator including histogram support
2	+

data/{aggregate.rb → lib/aggregate.rb} RENAMED Viewed

@@ -37,13 +37,17 @@ class Aggregate
     @outliers_high = 0
     # If the user asks we maintain a linear histogram
-    # STILL UNDER TEST/DEV
-    if false #(nil != low && nil != high && nil != width)
-      # This is a linear histogram
-      if high < low
+    if (nil != low && nil != high && nil != width)
+      #Validate linear specification
+      if high <= low
 	raise ArgumentError, "High bucket must be > Low bucket"
       end
+      if high - low < width
+        raise ArgumentError, "Histogram width must be <= histogram range"
+      end
       @low = low
       @high = high
       @width = width
@@ -63,10 +67,9 @@ class Aggregate
     if 0 == @count
       @min = data
       @max = data
-    elsif data > @max
-      @max = data
-    elsif data < @min
-      @min = data
+    else
+      @max = [data, @max].max
+      @min = [data, @min].min
     end
     # Update the running info
@@ -85,6 +88,14 @@ class Aggregate
   def std_dev
   end
+  # Combine two aggregates
+  #def +(b)
+  #  a = self
+  #  c = Aggregate.new
+  #  c.count = a.count + b.count
+  #end
   #Generate a pretty-printed ASCII representation of the histogram
   def to_s
     #Find the largest bucket and create an array of the rows we intend to print
@@ -182,26 +193,24 @@ class Aggregate
   def to_bucket(index)
     if linear?
-      return @low + ( (index + 1) * @width)
+      return @low + (index * @width)
     else
       return 2**(index)
     end
   end
-  def right_bucket?(index, data)
-    bucket = to_bucket(index)
+  def right_bucket? index, data
-    # Edge case
-    if 0 == index
-      prev_bucket = @low
-    else
-      prev_bucket = to_bucket(index - 1)
-    end
+    # check invariant
+    raise unless linear?
+    bucket = to_bucket(index)
-    #It's the right bucket if data falls between prev_bucket and bucket
-    prev_bucket <= data && data <= bucket
+    #It's the right bucket if data falls between bucket and next bucket
+    bucket <= data && data < bucket + @width
   end
+=begin
   def find_bucket(lower, upper, target)
     #Classic binary search
     return upper if right_bucket?(upper, target)
@@ -216,20 +225,24 @@ class Aggregate
       return find_bucket(middle, upper, target)
     end
   end
+=end
   # A data point is added to the bucket[n] where the data point
   # is less than the value represented by bucket[n], but greater
   # than the value represented by bucket[n+1]
   def to_index (data)
-    if linear?
-      find_bucket(0, bucket_count-1, data)
-    else
-      #log2 returns the bucket above the one we want,
-      #and we need to also subtract for 0 indexing of Array
-      log2(data).to_i
+    # basic case is simple
+    return log2(data).to_i if !linear?
+    # Search for the right bucket in the linear case
+    @buckets.each_with_index do |count, idx|
+      return idx if right_bucket?(idx, data)
     end
+    #find_bucket(0, bucket_count-1, data)
+    #Should not get here
+    raise "#{data}"
   end
   # log2(x) returns j, | i = j-1 and 2**i <= data < 2**j

data/test/ts_aggregate.rb ADDED Viewed

@@ -0,0 +1,134 @@
+require 'test/unit'
+require 'lib/aggregate'
+class SimpleStatsTest < Test::Unit::TestCase
+  def setup
+    @stats = Aggregate.new
+    @@DATA.each do |x|
+      @stats << x
+    end
+  end
+  def test_stats_count
+    assert_equal @@DATA.length, @stats.count
+  end
+  def test_stats_min_max
+    sorted_data = @@DATA.sort
+    assert_equal sorted_data[0], @stats.min
+    assert_equal sorted_data.last, @stats.max
+  end
+  def test_stats_mean
+    sum = 0
+    @@DATA.each do |x|
+      sum += x
+    end
+    assert_equal sum.to_f/@@DATA.length.to_f, @stats.mean
+  end
+  def test_bucket_counts
+    #Test each iterator
+    total_bucket_sum = 0
+    i = 0
+    @stats.each do |bucket, count|
+      assert_equal 2**i, bucket
+      total_bucket_sum += count
+      i += 1
+    end
+    assert_equal total_bucket_sum, @@DATA.length
+    #Test each_nonzero iterator
+    prev_bucket = 0
+    total_bucket_sum = 0
+    @stats.each_nonzero do |bucket, count|
+      assert bucket > prev_bucket
+      assert_not_equal count, 0
+      total_bucket_sum += count
+    end
+    assert_equal total_bucket_sum, @@DATA.length
+  end
+=begin
+  def test_addition
+    stats1 = Aggregate.new
+    stats2 = Aggregate.new
+    stats1 << 1
+    stats2 << 3
+    stats_sum = stats1 + stats2
+    assert_equal stats_sum.count, stats1.count + stats2.count
+  end
+=end
+  #XXX: Update test_bucket_contents() if you muck with @@DATA
+  @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
+  def test_bucket_contents
+    #XXX: This is the only test so far that cares about the actual contents
+    # of @@DATA, so if you update that array ... update this method too
+    expected_buckets  = [1, 4, 1024, 8192, 16384]
+    expected_counts =   [1, 3,    2,    1,     2]
+    i = 0
+    @stats.each_nonzero do |bucket, count|
+      assert_equal expected_buckets[i], bucket
+      assert_equal expected_counts[i],  count
+      # Increment for the next test
+      i += 1
+    end
+  end
+  def test_histogram
+    puts @stats.to_s
+  end
+  def test_outlier
+    @stats << -1
+    @stats << 2**129
+  end
+end
+class LinearHistogramTest < Test::Unit::TestCase
+  def setup
+    @stats = Aggregate.new(0, 32768, 1024)
+    @@DATA.each do |x|
+      @stats << x
+    end
+  end
+  def test_validation
+    assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,32,4)}
+    assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,16,4)}
+    assert_raise(ArgumentError) {bad_stats = Aggregate.new(16,32,17)}
+  end
+  #XXX: Update test_bucket_contents() if you muck with @@DATA
+  @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
+  def test_bucket_contents
+    #XXX: This is the only test so far that cares about the actual contents
+    # of @@DATA, so if you update that array ... update this method too
+    expected_buckets  = [0, 1024,  15360, 16384]
+    expected_counts =   [4, 2,     1,     2]
+    i = 0
+    @stats.each_nonzero do |bucket, count|
+      assert_equal expected_buckets[i], bucket
+      assert_equal expected_counts[i],  count
+      # Increment for the next test
+      i += 1
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: josephruscio-aggregate
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.3
 platform: ruby
 authors:
 - Joseph Ruscio
@@ -20,10 +20,13 @@ executables: []
 extensions: []
 extra_rdoc_files:
+- README
 - LICENSE
 files:
-- aggregate.rb
+- README
 - LICENSE
+- lib/aggregate.rb
+- test/ts_aggregate.rb
 has_rdoc: true
 homepage: http://github.com/josephruscio/aggregate
 licenses:
@@ -52,5 +55,5 @@ rubygems_version: 1.3.5
 signing_key:
 specification_version: 2
 summary: Aggregate is a Ruby library accumulating aggregate statistics (including histograms) in an object oriented manner.
-test_files: []
+test_files:
+- test/ts_aggregate.rb