to_histogram 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bucketizer.rb +19 -7
- data/lib/histogram.rb +7 -5
- data/lib/stdout_print.rb +37 -4
- data/lib/to_histogram.rb +3 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa1cb2265ca6164d3b0f7dd6292c1d499bfe0488
|
4
|
+
data.tar.gz: 7631a5d5fa1138dc8c606ea20796bd6728d8834b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5c41cd8733f140379ce4eadd116bd217549faef6e89ab6934374bae82a708def16b361015678b9d8fb896c6d5564577dd9ab84e3618e98e02b7708e59416c19
|
7
|
+
data.tar.gz: 426f7996e9e59fcccff17e8fa7d479b550b2f4367cb9ef6cfef45081dd780f5580f48260c4bb55c39c140d2045106aa380993742682796792ffcae094ca77df4
|
data/lib/bucketizer.rb
CHANGED
@@ -2,25 +2,35 @@ module ToHistogram
|
|
2
2
|
|
3
3
|
class Bucketizer
|
4
4
|
|
5
|
-
def initialize(array, num_buckets)
|
5
|
+
def initialize(array, num_buckets, percentile)
|
6
6
|
@arr = array.sort
|
7
7
|
@num_buckets = num_buckets
|
8
|
-
@bucket_increments = get_bucket_increment
|
8
|
+
@bucket_increments = get_bucket_increment(percentile)
|
9
9
|
end
|
10
10
|
attr_reader :bucket_increments
|
11
11
|
|
12
12
|
def create_buckets
|
13
13
|
l_index = 0
|
14
|
-
next_bucket =
|
14
|
+
next_bucket = @bucket_increments
|
15
15
|
buckets = []
|
16
16
|
|
17
|
+
# Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
|
18
|
+
if(@arr.count(0) > 0 && next_bucket == 1)
|
19
|
+
bucket_0 = []
|
20
|
+
@arr.count(0).times { bucket_0 << @arr.shift }
|
21
|
+
buckets << bucket_0
|
22
|
+
|
23
|
+
next_bucket += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
# Iterate thorough the remainder of the list in the normal case
|
17
27
|
@arr.each_with_index do |e, i|
|
18
28
|
break if buckets.length == (@num_buckets - 1)
|
19
29
|
|
20
|
-
if
|
30
|
+
if (e >= next_bucket)
|
21
31
|
buckets << @arr[l_index..(i - 1)]
|
22
32
|
|
23
|
-
# Special case where all of the results fit into the first bucket
|
33
|
+
# Special case here also where all of the results fit into the first bucket
|
24
34
|
if buckets[0].length == @arr.length
|
25
35
|
l_index = (@arr.length)
|
26
36
|
break
|
@@ -40,13 +50,15 @@ module ToHistogram
|
|
40
50
|
end
|
41
51
|
|
42
52
|
private
|
43
|
-
def get_bucket_increment
|
53
|
+
def get_bucket_increment(percentile)
|
54
|
+
nth_percentile = percentile / 100.0
|
55
|
+
|
44
56
|
if(@arr.length == 0)
|
45
57
|
return 0
|
46
58
|
elsif(@arr.length <= @num_buckets)
|
47
59
|
increment = ((@arr[-1] - @arr[0]) / @num_buckets)
|
48
60
|
else
|
49
|
-
increment = ((@arr[(@arr.length *
|
61
|
+
increment = ((@arr[(@arr.length * nth_percentile).to_i - 1] - @arr[0]) / @num_buckets.to_f).ceil
|
50
62
|
end
|
51
63
|
|
52
64
|
return increment
|
data/lib/histogram.rb
CHANGED
@@ -5,12 +5,14 @@ module ToHistogram
|
|
5
5
|
class Histogram
|
6
6
|
include Enumerable
|
7
7
|
|
8
|
-
def initialize(array, num_buckets)
|
9
|
-
bucketizer
|
10
|
-
@buckets
|
11
|
-
@increments
|
8
|
+
def initialize(array, num_buckets, percentile)
|
9
|
+
bucketizer = Bucketizer.new(array, num_buckets, percentile)
|
10
|
+
@buckets = bucketizer.create_buckets
|
11
|
+
@increments = bucketizer.bucket_increments
|
12
|
+
@percentile = percentile
|
13
|
+
@num_buckets = num_buckets
|
12
14
|
end
|
13
|
-
attr_reader :increments
|
15
|
+
attr_reader :increments, :percentile, :num_buckets
|
14
16
|
|
15
17
|
def each(&block)
|
16
18
|
@buckets.each do |b|
|
data/lib/stdout_print.rb
CHANGED
@@ -1,16 +1,35 @@
|
|
1
1
|
module ToHistogram
|
2
2
|
|
3
3
|
class StdoutPrint
|
4
|
-
def initialize(histogram)
|
5
|
-
@histogram
|
4
|
+
def initialize(histogram, original_array)
|
5
|
+
@histogram = histogram
|
6
|
+
@original_array = original_array.sort
|
6
7
|
end
|
7
8
|
|
8
9
|
def invoke
|
10
|
+
print_header
|
11
|
+
|
12
|
+
print_body
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def print_header
|
17
|
+
puts "\n**************************************************************"
|
18
|
+
puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile})"
|
19
|
+
puts "\n"
|
20
|
+
puts "Min Value: #{@original_array[0]}, Max Value: #{@original_array[-1]}"
|
21
|
+
puts "Mean: #{mean}, Median: #{median}, Mode: #{mode}"
|
22
|
+
puts "Histogram bucket sizes: #{@histogram.increments}"
|
23
|
+
puts "**************************************************************\n\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def print_body
|
9
27
|
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
10
|
-
printf("%-20s %-20s %-30s %-20s \n\n", "
|
28
|
+
printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage (out of #{total_data_value_length})", "Histogram (each * =~ 1%)")
|
11
29
|
|
12
30
|
@histogram.each_with_index do |b, i|
|
13
|
-
|
31
|
+
next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
|
32
|
+
range = "#{b[0]} to #{next_bucket}"
|
14
33
|
frequency = b.length
|
15
34
|
percentage = ((frequency.to_f / total_data_value_length) * 100)
|
16
35
|
stars = ''
|
@@ -27,5 +46,19 @@ module ToHistogram
|
|
27
46
|
end
|
28
47
|
end
|
29
48
|
|
49
|
+
def mean
|
50
|
+
@original_array.reduce(:+) / @original_array.length
|
51
|
+
end
|
52
|
+
|
53
|
+
def median
|
54
|
+
@original_array[@original_array.length / 2]
|
55
|
+
end
|
56
|
+
|
57
|
+
def mode
|
58
|
+
frequency = @original_array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
|
59
|
+
|
60
|
+
@original_array.max_by { |v| frequency[v] }
|
61
|
+
end
|
62
|
+
|
30
63
|
end
|
31
64
|
end
|
data/lib/to_histogram.rb
CHANGED
@@ -2,9 +2,9 @@ require_relative './histogram'
|
|
2
2
|
require_relative './stdout_print'
|
3
3
|
|
4
4
|
class Array
|
5
|
-
def to_histogram(num_buckets: 10)
|
6
|
-
histogram = ToHistogram::Histogram.new(self, num_buckets)
|
7
|
-
stdout_print = ToHistogram::StdoutPrint.new(histogram)
|
5
|
+
def to_histogram(num_buckets: 10, percentile: 100)
|
6
|
+
histogram = ToHistogram::Histogram.new(self, num_buckets, percentile)
|
7
|
+
stdout_print = ToHistogram::StdoutPrint.new(histogram, self)
|
8
8
|
|
9
9
|
stdout_print.invoke
|
10
10
|
|