josephruscio-aggregate 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,2 @@
1
+ Aggregate is a ruby implementation of a statistics aggregator including histogram support
2
+
@@ -37,13 +37,17 @@ class Aggregate
37
37
  @outliers_high = 0
38
38
 
39
39
  # If the user asks we maintain a linear histogram
40
- # STILL UNDER TEST/DEV
41
- if false #(nil != low && nil != high && nil != width)
42
- # This is a linear histogram
43
- if high < low
40
+ if (nil != low && nil != high && nil != width)
41
+
42
+ #Validate linear specification
43
+ if high <= low
44
44
  raise ArgumentError, "High bucket must be > Low bucket"
45
45
  end
46
46
 
47
+ if high - low < width
48
+ raise ArgumentError, "Histogram width must be <= histogram range"
49
+ end
50
+
47
51
  @low = low
48
52
  @high = high
49
53
  @width = width
@@ -63,10 +67,9 @@ class Aggregate
63
67
  if 0 == @count
64
68
  @min = data
65
69
  @max = data
66
- elsif data > @max
67
- @max = data
68
- elsif data < @min
69
- @min = data
70
+ else
71
+ @max = [data, @max].max
72
+ @min = [data, @min].min
70
73
  end
71
74
 
72
75
  # Update the running info
@@ -85,6 +88,14 @@ class Aggregate
85
88
  def std_dev
86
89
  end
87
90
 
91
+ # Combine two aggregates
92
+ #def +(b)
93
+ # a = self
94
+ # c = Aggregate.new
95
+
96
+ # c.count = a.count + b.count
97
+ #end
98
+
88
99
  #Generate a pretty-printed ASCII representation of the histogram
89
100
  def to_s
90
101
  #Find the largest bucket and create an array of the rows we intend to print
@@ -182,26 +193,24 @@ class Aggregate
182
193
 
183
194
  def to_bucket(index)
184
195
  if linear?
185
- return @low + ( (index + 1) * @width)
196
+ return @low + (index * @width)
186
197
  else
187
198
  return 2**(index)
188
199
  end
189
200
  end
190
201
 
191
- def right_bucket?(index, data)
192
- bucket = to_bucket(index)
202
+ def right_bucket? index, data
193
203
 
194
- # Edge case
195
- if 0 == index
196
- prev_bucket = @low
197
- else
198
- prev_bucket = to_bucket(index - 1)
199
- end
204
+ # check invariant
205
+ raise unless linear?
206
+
207
+ bucket = to_bucket(index)
200
208
 
201
- #It's the right bucket if data falls between prev_bucket and bucket
202
- prev_bucket <= data && data <= bucket
209
+ #It's the right bucket if data falls between bucket and next bucket
210
+ bucket <= data && data < bucket + @width
203
211
  end
204
212
 
213
+ =begin
205
214
  def find_bucket(lower, upper, target)
206
215
  #Classic binary search
207
216
  return upper if right_bucket?(upper, target)
@@ -216,20 +225,24 @@ class Aggregate
216
225
  return find_bucket(middle, upper, target)
217
226
  end
218
227
  end
228
+ =end
219
229
 
220
230
  # A data point is added to the bucket[n] where the data point
221
231
  # is less than the value represented by bucket[n], but greater
222
232
  # than the value represented by bucket[n+1]
223
233
  def to_index (data)
224
234
 
225
- if linear?
226
- find_bucket(0, bucket_count-1, data)
227
- else
228
- #log2 returns the bucket above the one we want,
229
- #and we need to also subtract for 0 indexing of Array
230
- log2(data).to_i
235
+ # basic case is simple
236
+ return log2(data).to_i if !linear?
237
+
238
+ # Search for the right bucket in the linear case
239
+ @buckets.each_with_index do |count, idx|
240
+ return idx if right_bucket?(idx, data)
231
241
  end
242
+ #find_bucket(0, bucket_count-1, data)
232
243
 
244
+ #Should not get here
245
+ raise "#{data}"
233
246
  end
234
247
 
235
248
  # log2(x) returns j, | i = j-1 and 2**i <= data < 2**j
@@ -0,0 +1,134 @@
1
+ require 'test/unit'
2
+ require 'lib/aggregate'
3
+
4
+ class SimpleStatsTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @stats = Aggregate.new
8
+
9
+ @@DATA.each do |x|
10
+ @stats << x
11
+ end
12
+ end
13
+
14
+ def test_stats_count
15
+ assert_equal @@DATA.length, @stats.count
16
+ end
17
+
18
+ def test_stats_min_max
19
+ sorted_data = @@DATA.sort
20
+
21
+ assert_equal sorted_data[0], @stats.min
22
+ assert_equal sorted_data.last, @stats.max
23
+ end
24
+
25
+ def test_stats_mean
26
+ sum = 0
27
+ @@DATA.each do |x|
28
+ sum += x
29
+ end
30
+
31
+ assert_equal sum.to_f/@@DATA.length.to_f, @stats.mean
32
+ end
33
+
34
+ def test_bucket_counts
35
+
36
+ #Test each iterator
37
+ total_bucket_sum = 0
38
+ i = 0
39
+ @stats.each do |bucket, count|
40
+ assert_equal 2**i, bucket
41
+
42
+ total_bucket_sum += count
43
+ i += 1
44
+ end
45
+
46
+ assert_equal total_bucket_sum, @@DATA.length
47
+
48
+ #Test each_nonzero iterator
49
+ prev_bucket = 0
50
+ total_bucket_sum = 0
51
+ @stats.each_nonzero do |bucket, count|
52
+ assert bucket > prev_bucket
53
+ assert_not_equal count, 0
54
+
55
+ total_bucket_sum += count
56
+ end
57
+
58
+ assert_equal total_bucket_sum, @@DATA.length
59
+ end
60
+
61
+ =begin
62
+ def test_addition
63
+ stats1 = Aggregate.new
64
+ stats2 = Aggregate.new
65
+
66
+ stats1 << 1
67
+ stats2 << 3
68
+
69
+ stats_sum = stats1 + stats2
70
+
71
+ assert_equal stats_sum.count, stats1.count + stats2.count
72
+ end
73
+ =end
74
+
75
+ #XXX: Update test_bucket_contents() if you muck with @@DATA
76
+ @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
77
+ def test_bucket_contents
78
+ #XXX: This is the only test so far that cares about the actual contents
79
+ # of @@DATA, so if you update that array ... update this method too
80
+ expected_buckets = [1, 4, 1024, 8192, 16384]
81
+ expected_counts = [1, 3, 2, 1, 2]
82
+
83
+ i = 0
84
+ @stats.each_nonzero do |bucket, count|
85
+ assert_equal expected_buckets[i], bucket
86
+ assert_equal expected_counts[i], count
87
+ # Increment for the next test
88
+ i += 1
89
+ end
90
+ end
91
+
92
+ def test_histogram
93
+ puts @stats.to_s
94
+ end
95
+
96
+ def test_outlier
97
+ @stats << -1
98
+ @stats << 2**129
99
+ end
100
+ end
101
+
102
+ class LinearHistogramTest < Test::Unit::TestCase
103
+ def setup
104
+ @stats = Aggregate.new(0, 32768, 1024)
105
+
106
+ @@DATA.each do |x|
107
+ @stats << x
108
+ end
109
+ end
110
+
111
+ def test_validation
112
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,32,4)}
113
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,16,4)}
114
+ assert_raise(ArgumentError) {bad_stats = Aggregate.new(16,32,17)}
115
+ end
116
+
117
+ #XXX: Update test_bucket_contents() if you muck with @@DATA
118
+ @@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
119
+ def test_bucket_contents
120
+ #XXX: This is the only test so far that cares about the actual contents
121
+ # of @@DATA, so if you update that array ... update this method too
122
+ expected_buckets = [0, 1024, 15360, 16384]
123
+ expected_counts = [4, 2, 1, 2]
124
+
125
+ i = 0
126
+ @stats.each_nonzero do |bucket, count|
127
+ assert_equal expected_buckets[i], bucket
128
+ assert_equal expected_counts[i], count
129
+ # Increment for the next test
130
+ i += 1
131
+ end
132
+ end
133
+
134
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: josephruscio-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Ruscio
@@ -20,10 +20,13 @@ executables: []
20
20
  extensions: []
21
21
 
22
22
  extra_rdoc_files:
23
+ - README
23
24
  - LICENSE
24
25
  files:
25
- - aggregate.rb
26
+ - README
26
27
  - LICENSE
28
+ - lib/aggregate.rb
29
+ - test/ts_aggregate.rb
27
30
  has_rdoc: true
28
31
  homepage: http://github.com/josephruscio/aggregate
29
32
  licenses:
@@ -52,5 +55,5 @@ rubygems_version: 1.3.5
52
55
  signing_key:
53
56
  specification_version: 2
54
57
  summary: Aggregate is a Ruby library accumulating aggregate statistics (including histograms) in an object oriented manner.
55
- test_files: []
56
-
58
+ test_files:
59
+ - test/ts_aggregate.rb