josephruscio-aggregate 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -0
- data/{aggregate.rb → lib/aggregate.rb} +38 -25
- data/test/ts_aggregate.rb +134 -0
- metadata +7 -4
data/README
ADDED
@@ -37,13 +37,17 @@ class Aggregate
|
|
37
37
|
@outliers_high = 0
|
38
38
|
|
39
39
|
# If the user asks we maintain a linear histogram
|
40
|
-
|
41
|
-
|
42
|
-
#
|
43
|
-
if high
|
40
|
+
if (nil != low && nil != high && nil != width)
|
41
|
+
|
42
|
+
#Validate linear specification
|
43
|
+
if high <= low
|
44
44
|
raise ArgumentError, "High bucket must be > Low bucket"
|
45
45
|
end
|
46
46
|
|
47
|
+
if high - low < width
|
48
|
+
raise ArgumentError, "Histogram width must be <= histogram range"
|
49
|
+
end
|
50
|
+
|
47
51
|
@low = low
|
48
52
|
@high = high
|
49
53
|
@width = width
|
@@ -63,10 +67,9 @@ class Aggregate
|
|
63
67
|
if 0 == @count
|
64
68
|
@min = data
|
65
69
|
@max = data
|
66
|
-
|
67
|
-
@max = data
|
68
|
-
|
69
|
-
@min = data
|
70
|
+
else
|
71
|
+
@max = [data, @max].max
|
72
|
+
@min = [data, @min].min
|
70
73
|
end
|
71
74
|
|
72
75
|
# Update the running info
|
@@ -85,6 +88,14 @@ class Aggregate
|
|
85
88
|
def std_dev
|
86
89
|
end
|
87
90
|
|
91
|
+
# Combine two aggregates
|
92
|
+
#def +(b)
|
93
|
+
# a = self
|
94
|
+
# c = Aggregate.new
|
95
|
+
|
96
|
+
# c.count = a.count + b.count
|
97
|
+
#end
|
98
|
+
|
88
99
|
#Generate a pretty-printed ASCII representation of the histogram
|
89
100
|
def to_s
|
90
101
|
#Find the largest bucket and create an array of the rows we intend to print
|
@@ -182,26 +193,24 @@ class Aggregate
|
|
182
193
|
|
183
194
|
def to_bucket(index)
|
184
195
|
if linear?
|
185
|
-
return @low + (
|
196
|
+
return @low + (index * @width)
|
186
197
|
else
|
187
198
|
return 2**(index)
|
188
199
|
end
|
189
200
|
end
|
190
201
|
|
191
|
-
def right_bucket?
|
192
|
-
bucket = to_bucket(index)
|
202
|
+
def right_bucket? index, data
|
193
203
|
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
prev_bucket = to_bucket(index - 1)
|
199
|
-
end
|
204
|
+
# check invariant
|
205
|
+
raise unless linear?
|
206
|
+
|
207
|
+
bucket = to_bucket(index)
|
200
208
|
|
201
|
-
#It's the right bucket if data falls between
|
202
|
-
|
209
|
+
#It's the right bucket if data falls between bucket and next bucket
|
210
|
+
bucket <= data && data < bucket + @width
|
203
211
|
end
|
204
212
|
|
213
|
+
=begin
|
205
214
|
def find_bucket(lower, upper, target)
|
206
215
|
#Classic binary search
|
207
216
|
return upper if right_bucket?(upper, target)
|
@@ -216,20 +225,24 @@ class Aggregate
|
|
216
225
|
return find_bucket(middle, upper, target)
|
217
226
|
end
|
218
227
|
end
|
228
|
+
=end
|
219
229
|
|
220
230
|
# A data point is added to the bucket[n] where the data point
|
221
231
|
# is less than the value represented by bucket[n], but greater
|
222
232
|
# than the value represented by bucket[n+1]
|
223
233
|
def to_index (data)
|
224
234
|
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
235
|
+
# basic case is simple
|
236
|
+
return log2(data).to_i if !linear?
|
237
|
+
|
238
|
+
# Search for the right bucket in the linear case
|
239
|
+
@buckets.each_with_index do |count, idx|
|
240
|
+
return idx if right_bucket?(idx, data)
|
231
241
|
end
|
242
|
+
#find_bucket(0, bucket_count-1, data)
|
232
243
|
|
244
|
+
#Should not get here
|
245
|
+
raise "#{data}"
|
233
246
|
end
|
234
247
|
|
235
248
|
# log2(x) returns j, | i = j-1 and 2**i <= data < 2**j
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'lib/aggregate'
|
3
|
+
|
4
|
+
class SimpleStatsTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@stats = Aggregate.new
|
8
|
+
|
9
|
+
@@DATA.each do |x|
|
10
|
+
@stats << x
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_stats_count
|
15
|
+
assert_equal @@DATA.length, @stats.count
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_stats_min_max
|
19
|
+
sorted_data = @@DATA.sort
|
20
|
+
|
21
|
+
assert_equal sorted_data[0], @stats.min
|
22
|
+
assert_equal sorted_data.last, @stats.max
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_stats_mean
|
26
|
+
sum = 0
|
27
|
+
@@DATA.each do |x|
|
28
|
+
sum += x
|
29
|
+
end
|
30
|
+
|
31
|
+
assert_equal sum.to_f/@@DATA.length.to_f, @stats.mean
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_bucket_counts
|
35
|
+
|
36
|
+
#Test each iterator
|
37
|
+
total_bucket_sum = 0
|
38
|
+
i = 0
|
39
|
+
@stats.each do |bucket, count|
|
40
|
+
assert_equal 2**i, bucket
|
41
|
+
|
42
|
+
total_bucket_sum += count
|
43
|
+
i += 1
|
44
|
+
end
|
45
|
+
|
46
|
+
assert_equal total_bucket_sum, @@DATA.length
|
47
|
+
|
48
|
+
#Test each_nonzero iterator
|
49
|
+
prev_bucket = 0
|
50
|
+
total_bucket_sum = 0
|
51
|
+
@stats.each_nonzero do |bucket, count|
|
52
|
+
assert bucket > prev_bucket
|
53
|
+
assert_not_equal count, 0
|
54
|
+
|
55
|
+
total_bucket_sum += count
|
56
|
+
end
|
57
|
+
|
58
|
+
assert_equal total_bucket_sum, @@DATA.length
|
59
|
+
end
|
60
|
+
|
61
|
+
=begin
|
62
|
+
def test_addition
|
63
|
+
stats1 = Aggregate.new
|
64
|
+
stats2 = Aggregate.new
|
65
|
+
|
66
|
+
stats1 << 1
|
67
|
+
stats2 << 3
|
68
|
+
|
69
|
+
stats_sum = stats1 + stats2
|
70
|
+
|
71
|
+
assert_equal stats_sum.count, stats1.count + stats2.count
|
72
|
+
end
|
73
|
+
=end
|
74
|
+
|
75
|
+
#XXX: Update test_bucket_contents() if you muck with @@DATA
|
76
|
+
@@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
|
77
|
+
def test_bucket_contents
|
78
|
+
#XXX: This is the only test so far that cares about the actual contents
|
79
|
+
# of @@DATA, so if you update that array ... update this method too
|
80
|
+
expected_buckets = [1, 4, 1024, 8192, 16384]
|
81
|
+
expected_counts = [1, 3, 2, 1, 2]
|
82
|
+
|
83
|
+
i = 0
|
84
|
+
@stats.each_nonzero do |bucket, count|
|
85
|
+
assert_equal expected_buckets[i], bucket
|
86
|
+
assert_equal expected_counts[i], count
|
87
|
+
# Increment for the next test
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_histogram
|
93
|
+
puts @stats.to_s
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_outlier
|
97
|
+
@stats << -1
|
98
|
+
@stats << 2**129
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class LinearHistogramTest < Test::Unit::TestCase
|
103
|
+
def setup
|
104
|
+
@stats = Aggregate.new(0, 32768, 1024)
|
105
|
+
|
106
|
+
@@DATA.each do |x|
|
107
|
+
@stats << x
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_validation
|
112
|
+
assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,32,4)}
|
113
|
+
assert_raise(ArgumentError) {bad_stats = Aggregate.new(32,16,4)}
|
114
|
+
assert_raise(ArgumentError) {bad_stats = Aggregate.new(16,32,17)}
|
115
|
+
end
|
116
|
+
|
117
|
+
#XXX: Update test_bucket_contents() if you muck with @@DATA
|
118
|
+
@@DATA = [ 1, 5, 4, 6, 1028, 1972, 16384, 16385, 16383 ]
|
119
|
+
def test_bucket_contents
|
120
|
+
#XXX: This is the only test so far that cares about the actual contents
|
121
|
+
# of @@DATA, so if you update that array ... update this method too
|
122
|
+
expected_buckets = [0, 1024, 15360, 16384]
|
123
|
+
expected_counts = [4, 2, 1, 2]
|
124
|
+
|
125
|
+
i = 0
|
126
|
+
@stats.each_nonzero do |bucket, count|
|
127
|
+
assert_equal expected_buckets[i], bucket
|
128
|
+
assert_equal expected_counts[i], count
|
129
|
+
# Increment for the next test
|
130
|
+
i += 1
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: josephruscio-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Ruscio
|
@@ -20,10 +20,13 @@ executables: []
|
|
20
20
|
extensions: []
|
21
21
|
|
22
22
|
extra_rdoc_files:
|
23
|
+
- README
|
23
24
|
- LICENSE
|
24
25
|
files:
|
25
|
-
-
|
26
|
+
- README
|
26
27
|
- LICENSE
|
28
|
+
- lib/aggregate.rb
|
29
|
+
- test/ts_aggregate.rb
|
27
30
|
has_rdoc: true
|
28
31
|
homepage: http://github.com/josephruscio/aggregate
|
29
32
|
licenses:
|
@@ -52,5 +55,5 @@ rubygems_version: 1.3.5
|
|
52
55
|
signing_key:
|
53
56
|
specification_version: 2
|
54
57
|
summary: Aggregate is a Ruby library accumulating aggregate statistics (including histograms) in an object oriented manner.
|
55
|
-
test_files:
|
56
|
-
|
58
|
+
test_files:
|
59
|
+
- test/ts_aggregate.rb
|