josephruscio-aggregate 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/aggregate.rb +46 -19
- data/test/ts_aggregate.rb +11 -0
- metadata +1 -1
data/lib/aggregate.rb
CHANGED
@@ -82,10 +82,12 @@ class Aggregate
|
|
82
82
|
end
|
83
83
|
|
84
84
|
def mean
|
85
|
-
@sum /
|
85
|
+
@sum / @count
|
86
86
|
end
|
87
87
|
|
88
|
+
#Calculate the standard deviation
|
88
89
|
def std_dev
|
90
|
+
Math.sqrt((@sum2.to_f - ((@sum.to_f * @sum.to_f)/@count.to_f)) / (@count.to_f - 1))
|
89
91
|
end
|
90
92
|
|
91
93
|
# Combine two aggregates
|
@@ -97,38 +99,55 @@ class Aggregate
|
|
97
99
|
#end
|
98
100
|
|
99
101
|
#Generate a pretty-printed ASCII representation of the histogram
|
100
|
-
def to_s
|
102
|
+
def to_s(columns=nil)
|
103
|
+
|
104
|
+
#default to an 80 column terminal, don't support < 80 for now
|
105
|
+
if nil == columns
|
106
|
+
columns = 80
|
107
|
+
else
|
108
|
+
raise ArgumentError if columns < 80
|
109
|
+
end
|
110
|
+
|
101
111
|
#Find the largest bucket and create an array of the rows we intend to print
|
102
|
-
max_count = 0
|
103
112
|
disp_buckets = Array.new
|
113
|
+
max_count = 0
|
114
|
+
total = 0
|
104
115
|
@buckets.each_with_index do |count, idx|
|
105
116
|
next if 0 == count
|
106
|
-
max_count =
|
117
|
+
max_count = [max_count, count].max
|
107
118
|
disp_buckets << [idx, to_bucket(idx), count]
|
119
|
+
total += count
|
108
120
|
end
|
109
121
|
|
110
122
|
#Figure out how wide the value and count columns need to be based on their
|
111
123
|
#largest respective numbers
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
max_bar_width.times { header += "-"}
|
120
|
-
header += " count"
|
124
|
+
value_str = "value"
|
125
|
+
count_str = "count"
|
126
|
+
total_str = "Total"
|
127
|
+
value_width = [disp_buckets.last[1].to_s.length, value_str.length].max
|
128
|
+
value_width = [value_width, total_str.length].max
|
129
|
+
count_width = [total.to_s.length, count_str.length].max
|
130
|
+
max_bar_width = columns - (value_width + " |".length + "| ".length + count_width)
|
121
131
|
|
122
132
|
#Determine the value of a '@'
|
123
133
|
weight = [max_count.to_f/max_bar_width.to_f, 1.0].max
|
124
134
|
|
135
|
+
#format the header
|
136
|
+
histogram = sprintf("%#{value_width}s |", value_str)
|
137
|
+
max_bar_width.times { histogram << "-"}
|
138
|
+
histogram << sprintf("| %#{count_width}s\n", count_str)
|
139
|
+
|
140
|
+
# We denote empty buckets with a '~'
|
141
|
+
def skip_row(value_width)
|
142
|
+
sprintf("%#{value_width}s ~\n", " ")
|
143
|
+
end
|
144
|
+
|
125
145
|
#Loop through each bucket to be displayed and output the correct number
|
126
|
-
histogram = ""
|
127
146
|
prev_index = disp_buckets[0][0] - 1
|
147
|
+
|
128
148
|
disp_buckets.each do |x|
|
129
|
-
|
130
149
|
#Denote skipped empty buckets with a ~
|
131
|
-
histogram
|
150
|
+
histogram << skip_row(value_width) unless prev_index == x[0] - 1
|
132
151
|
prev_index = x[0]
|
133
152
|
|
134
153
|
#Add the value
|
@@ -140,14 +159,22 @@ class Aggregate
|
|
140
159
|
(max_bar_width - bar_size).times { row += " " }
|
141
160
|
|
142
161
|
#Add the count
|
143
|
-
row
|
162
|
+
row << sprintf("| %#{count_width}d\n", x[2])
|
144
163
|
|
145
164
|
#Append the finished row onto the histogram
|
146
|
-
histogram
|
165
|
+
histogram << row
|
147
166
|
end
|
148
167
|
|
168
|
+
#End the table
|
169
|
+
histogram << skip_row(value_width) if disp_buckets.last[0] != bucket_count-1
|
170
|
+
histogram << sprintf("%#{value_width}s", "Total")
|
171
|
+
histogram << " |"
|
172
|
+
max_bar_width.times {histogram << "-"}
|
173
|
+
histogram << "| "
|
174
|
+
histogram << sprintf("%#{count_width}d\n", total)
|
175
|
+
|
149
176
|
#Put the pieces together
|
150
|
-
"\n" +
|
177
|
+
"\n" + histogram
|
151
178
|
end
|
152
179
|
|
153
180
|
#Iterate through each bucket in the histogram regardless of
|
data/test/ts_aggregate.rb
CHANGED
@@ -94,8 +94,19 @@ class SimpleStatsTest < Test::Unit::TestCase
|
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_outlier
|
97
|
+
assert_equal 0, @stats.outliers_low
|
98
|
+
assert_equal 0, @stats.outliers_high
|
99
|
+
|
97
100
|
@stats << -1
|
101
|
+
@stats << -2
|
98
102
|
@stats << 2**129
|
103
|
+
|
104
|
+
assert_equal 2, @stats.outliers_low
|
105
|
+
assert_equal 1, @stats.outliers_high
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_std_dev
|
109
|
+
@stats.std_dev
|
99
110
|
end
|
100
111
|
end
|
101
112
|
|