josephruscio-aggregate 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,2 @@
1
+ Aggregate is a ruby implementation of a statistics aggregator including histogram support
2
+
@@ -37,13 +37,17 @@ class Aggregate
37
37
  @outliers_high = 0
38
38
 
39
39
  # If the user asks we maintain a linear histogram
40
- # STILL UNDER TEST/DEV
41
- if false #(nil != low && nil != high && nil != width)
42
- # This is a linear histogram
43
- if high < low
40
+ if (nil != low && nil != high && nil != width)
41
+
42
+ #Validate linear specification
43
+ if high <= low
44
44
  raise ArgumentError, "High bucket must be > Low bucket"
45
45
  end
46
46
 
47
+ if high - low < width
48
+ raise ArgumentError, "Histogram width must be <= histogram range"
49
+ end
50
+
47
51
  @low = low
48
52
  @high = high
49
53
  @width = width
@@ -63,10 +67,9 @@ class Aggregate
63
67
  if 0 == @count
64
68
  @min = data
65
69
  @max = data
66
- elsif data > @max
67
- @max = data
68
- elsif data < @min
69
- @min = data
70
+ else
71
+ @max = [data, @max].max
72
+ @min = [data, @min].min
70
73
  end
71
74
 
72
75
  # Update the running info
@@ -85,6 +88,14 @@ class Aggregate
85
88
  def std_dev
86
89
  end
87
90
 
91
+ # Combine two aggregates
92
+ #def +(b)
93
+ # a = self
94
+ # c = Aggregate.new
95
+
96
+ # c.count = a.count + b.count
97
+ #end
98
+
88
99
  #Generate a pretty-printed ASCII representation of the histogram
89
100
  def to_s
90
101
  #Find the largest bucket and create an array of the rows we intend to print
@@ -182,26 +193,24 @@ class Aggregate
182
193
 
183
194
  def to_bucket(index)
184
195
  if linear?
185
- return @low + ( (index + 1) * @width)
196
+ return @low + (index * @width)
186
197
  else
187
198
  return 2**(index)
188
199
  end
189
200
  end
190
201
 
191
- def right_bucket?(index, data)
192
- bucket = to_bucket(index)
202
+ def right_bucket? index, data
193
203
 
194
- # Edge case
195
- if 0 == index
196
- prev_bucket = @low
197
- else
198
- prev_bucket = to_bucket(index - 1)
199
- end
204
+ # check invariant
205
+ raise unless linear?
206
+
207
+ bucket = to_bucket(index)
200
208
 
201
- #It's the right bucket if data falls between prev_bucket and bucket
202
- prev_bucket <= data && data <= bucket
209
+ #It's the right bucket if data falls between bucket and next bucket
210
+ bucket <= data && data < bucket + @width
203
211
  end
204
212
 
213
+ =begin
205
214
  def find_bucket(lower, upper, target)
206
215
  #Classic binary search
207
216
  return upper if right_bucket?(upper, target)
@@ -216,20 +225,24 @@ class Aggregate
216
225
  return find_bucket(middle, upper, target)
217
226
  end
218
227
  end
228
+ =end
219
229
 
220
230
  # A data point is added to the bucket[n] where the data point
221
231
  # is less than the value represented by bucket[n], but greater
222
232
  # than the value represented by bucket[n+1]
223
233
  def to_index (data)
224
234
 
225
- if linear?
226
- find_bucket(0, bucket_count-1, data)
227
- else
228
- #log2 returns the bucket above the one we want,
229
- #and we need to also subtract for 0 indexing of Array
230
- log2(data).to_i
235
+ # basic case is simple
236
+ return log2(data).to_i if !linear?
237
+
238
+ # Search for the right bucket in the linear case
239
+ @buckets.each_with_index do |count, idx|
240
+ return idx if right_bucket?(idx, data)
231
241
  end
242
+ #find_bucket(0, bucket_count-1, data)
232
243
 
244
+ #Should not get here
245
+ raise "#{data}"
233
246
  end
234
247
 
235
248
  # log2(x) returns j, | i = j-1 and 2**i <= data < 2**j
@@ -0,0 +1,134 @@
1
+ require 'test/unit'
2
+ require 'lib/aggregate'
3
+
4
+ class SimpleStatsTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @stats = Aggregate.new
8
+
9
+ @@DATA.each do |x|
10
+ @stats << x
11
+ end
12
+ end
13
+
14
+ def test_stats_count
15
+ assert_equal @@DATA.length, @stats.count
16
+ end
17
+
18
+ def test_stats_min_max
19
+ sorted_data = @@DATA.sort
20
+
21
+ assert_equal sorted_data[0], @stats.min
22
+ assert_equal sorted_data.last, @stats.max
23
+ end
24
+
25
+ def test_stats_mean
26
+ sum = 0
27
+ @@DATA.each do |x|
28
+ sum += x
29
+ end
30
+
31
+ assert_equal sum.to_f/@@DATA.length.to_f, @stats.mean
32
+ end
33
+
34
+ def test_bucket_counts
35
+
36
+ #Test each iterator
37
+ total_bucket_sum = 0
38
+ i = 0
39
+ @stats.each do |bucket, count|
40
+ assert_equal 2**i, bucket
41
+
42
+ total_bucket_sum += count
43
+ i += 1
44
+ end
45
+
46
+ assert_equal total_bucket_sum, @@DATA.length
47
+
48
+ #Test each_nonzero iterator
49
+ prev_bucket = 0
50
+ total_bucket_sum = 0
51
+ @stats.each_nonzero do |bucket, count|
52
+ assert bucket > prev_bucket
53
+ assert_not_equal count, 0
54
+
55
+ total_bucket_sum += count
56
+ end
57
+
58
+ assert_equal total_bucket_sum, @@DATA.length
59
+ end
60
+
61
+ =begin
62
+ def test_addition
63
+ stats1 = Aggregate.new
64
+ stats2 = Aggregate.new
65
+
66
+ stats1 << 1
67
+ stats2 << 3
68
+
69
+ stats_sum = stats1 + stats2
70
+
71
+ assert_equal stats_sum.count, stats1.count + stats2.count
72
+ end
73
+ =end
74
+
75
+ #XXX: Update test_bucket_contents() if you muck with @@DATA
76
+ @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
77
+ def test_bucket_contents
78
+ #XXX: This is the only test so far that cares about the actual contents
79
+ # of @@DATA, so if you update that array ... update this method too
80
+ expected_buckets = [1, 4, 1024, 8192, 16384]
81
+ expected_counts = [1, 3, 2, 1, 2]
82
+
83
+ i = 0
84
+ @stats.each_nonzero do |bucket, count|
85
+ assert_equal expected_buckets[i], bucket
86
+ assert_equal expected_counts[i], count
87
+ # Increment for the next test
88
+ i += 1
89
+ end
90
+ end
91
+
92
+ def test_histogram
93
+ puts @stats.to_s
94
+ end
95
+
96
+ def test_outlier
97
+ @stats << -1
98
+ @stats << 2**129
99
+ end
100
+ end
101
+
102
+ class LinearHistogramTest < Test::Unit::TestCase
103
+ def setup
104
+ @stats = Aggregate.new(0, 32768, 1024)
105
+
106
+ @@DATA.each do |x|
107
+ @stats << x
108
+ end
109
+ end
110
+
111
+ def test_validation
112
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,32,4)}
113
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,16,4)}
114
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(16,32,17)}
115
+ end
116
+
117
+ #XXX: Update test_bucket_contents() if you muck with @@DATA
118
+ @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
119
+ def test_bucket_contents
120
+ #XXX: This is the only test so far that cares about the actual contents
121
+ # of @@DATA, so if you update that array ... update this method too
122
+ expected_buckets = [0, 1024, 15360, 16384]
123
+ expected_counts = [4, 2, 1, 2]
124
+
125
+ i = 0
126
+ @stats.each_nonzero do |bucket, count|
127
+ assert_equal expected_buckets[i], bucket
128
+ assert_equal expected_counts[i], count
129
+ # Increment for the next test
130
+ i += 1
131
+ end
132
+ end
133
+
134
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: josephruscio-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Ruscio
@@ -20,10 +20,13 @@ executables: []
20
20
  extensions: []
21
21
 
22
22
  extra_rdoc_files:
23
+ - README
23
24
  - LICENSE
24
25
  files:
25
- - aggregate.rb
26
+ - README
26
27
  - LICENSE
28
+ - lib/aggregate.rb
29
+ - test/ts_aggregate.rb
27
30
  has_rdoc: true
28
31
  homepage: http://github.com/josephruscio/aggregate
29
32
  licenses:
@@ -52,5 +55,5 @@ rubygems_version: 1.3.5
52
55
  signing_key:
53
56
  specification_version: 2
54
57
  summary: Aggregate is a Ruby library accumulating aggregate statistics (including histograms) in an object oriented manner.
55
- test_files: []
56
-
58
+ test_files:
59
+ - test/ts_aggregate.rb