to_histogram 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bucketizer.rb +19 -7
- data/lib/histogram.rb +7 -5
- data/lib/stdout_print.rb +37 -4
- data/lib/to_histogram.rb +3 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa1cb2265ca6164d3b0f7dd6292c1d499bfe0488
|
4
|
+
data.tar.gz: 7631a5d5fa1138dc8c606ea20796bd6728d8834b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5c41cd8733f140379ce4eadd116bd217549faef6e89ab6934374bae82a708def16b361015678b9d8fb896c6d5564577dd9ab84e3618e98e02b7708e59416c19
|
7
|
+
data.tar.gz: 426f7996e9e59fcccff17e8fa7d479b550b2f4367cb9ef6cfef45081dd780f5580f48260c4bb55c39c140d2045106aa380993742682796792ffcae094ca77df4
|
data/lib/bucketizer.rb
CHANGED
@@ -2,25 +2,35 @@ module ToHistogram
|
|
2
2
|
|
3
3
|
class Bucketizer
|
4
4
|
|
5
|
-
def initialize(array, num_buckets)
|
5
|
+
def initialize(array, num_buckets, percentile)
|
6
6
|
@arr = array.sort
|
7
7
|
@num_buckets = num_buckets
|
8
|
-
@bucket_increments = get_bucket_increment
|
8
|
+
@bucket_increments = get_bucket_increment(percentile)
|
9
9
|
end
|
10
10
|
attr_reader :bucket_increments
|
11
11
|
|
12
12
|
def create_buckets
|
13
13
|
l_index = 0
|
14
|
-
next_bucket =
|
14
|
+
next_bucket = @bucket_increments
|
15
15
|
buckets = []
|
16
16
|
|
17
|
+
# Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
|
18
|
+
if(@arr.count(0) > 0 && next_bucket == 1)
|
19
|
+
bucket_0 = []
|
20
|
+
@arr.count(0).times { bucket_0 << @arr.shift }
|
21
|
+
buckets << bucket_0
|
22
|
+
|
23
|
+
next_bucket += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
# Iterate thorough the remainder of the list in the normal case
|
17
27
|
@arr.each_with_index do |e, i|
|
18
28
|
break if buckets.length == (@num_buckets - 1)
|
19
29
|
|
20
|
-
if
|
30
|
+
if (e >= next_bucket)
|
21
31
|
buckets << @arr[l_index..(i - 1)]
|
22
32
|
|
23
|
-
# Special case where all of the results fit into the first bucket
|
33
|
+
# Special case here also where all of the results fit into the first bucket
|
24
34
|
if buckets[0].length == @arr.length
|
25
35
|
l_index = (@arr.length)
|
26
36
|
break
|
@@ -40,13 +50,15 @@ module ToHistogram
|
|
40
50
|
end
|
41
51
|
|
42
52
|
private
|
43
|
-
def get_bucket_increment
|
53
|
+
def get_bucket_increment(percentile)
|
54
|
+
nth_percentile = percentile / 100.0
|
55
|
+
|
44
56
|
if(@arr.length == 0)
|
45
57
|
return 0
|
46
58
|
elsif(@arr.length <= @num_buckets)
|
47
59
|
increment = ((@arr[-1] - @arr[0]) / @num_buckets)
|
48
60
|
else
|
49
|
-
increment = ((@arr[(@arr.length *
|
61
|
+
increment = ((@arr[(@arr.length * nth_percentile).to_i - 1] - @arr[0]) / @num_buckets.to_f).ceil
|
50
62
|
end
|
51
63
|
|
52
64
|
return increment
|
data/lib/histogram.rb
CHANGED
@@ -5,12 +5,14 @@ module ToHistogram
|
|
5
5
|
class Histogram
|
6
6
|
include Enumerable
|
7
7
|
|
8
|
-
def initialize(array, num_buckets)
|
9
|
-
bucketizer
|
10
|
-
@buckets
|
11
|
-
@increments
|
8
|
+
def initialize(array, num_buckets, percentile)
|
9
|
+
bucketizer = Bucketizer.new(array, num_buckets, percentile)
|
10
|
+
@buckets = bucketizer.create_buckets
|
11
|
+
@increments = bucketizer.bucket_increments
|
12
|
+
@percentile = percentile
|
13
|
+
@num_buckets = num_buckets
|
12
14
|
end
|
13
|
-
attr_reader :increments
|
15
|
+
attr_reader :increments, :percentile, :num_buckets
|
14
16
|
|
15
17
|
def each(&block)
|
16
18
|
@buckets.each do |b|
|
data/lib/stdout_print.rb
CHANGED
@@ -1,16 +1,35 @@
|
|
1
1
|
module ToHistogram
|
2
2
|
|
3
3
|
class StdoutPrint
|
4
|
-
def initialize(histogram)
|
5
|
-
@histogram
|
4
|
+
def initialize(histogram, original_array)
|
5
|
+
@histogram = histogram
|
6
|
+
@original_array = original_array.sort
|
6
7
|
end
|
7
8
|
|
8
9
|
def invoke
|
10
|
+
print_header
|
11
|
+
|
12
|
+
print_body
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def print_header
|
17
|
+
puts "\n**************************************************************"
|
18
|
+
puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile})"
|
19
|
+
puts "\n"
|
20
|
+
puts "Min Value: #{@original_array[0]}, Max Value: #{@original_array[-1]}"
|
21
|
+
puts "Mean: #{mean}, Median: #{median}, Mode: #{mode}"
|
22
|
+
puts "Histogram bucket sizes: #{@histogram.increments}"
|
23
|
+
puts "**************************************************************\n\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def print_body
|
9
27
|
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
10
|
-
printf("%-20s %-20s %-30s %-20s \n\n", "
|
28
|
+
printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage (out of #{total_data_value_length})", "Histogram (each * =~ 1%)")
|
11
29
|
|
12
30
|
@histogram.each_with_index do |b, i|
|
13
|
-
|
31
|
+
next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
|
32
|
+
range = "#{b[0]} to #{next_bucket}"
|
14
33
|
frequency = b.length
|
15
34
|
percentage = ((frequency.to_f / total_data_value_length) * 100)
|
16
35
|
stars = ''
|
@@ -27,5 +46,19 @@ module ToHistogram
|
|
27
46
|
end
|
28
47
|
end
|
29
48
|
|
49
|
+
def mean
|
50
|
+
@original_array.reduce(:+) / @original_array.length
|
51
|
+
end
|
52
|
+
|
53
|
+
def median
|
54
|
+
@original_array[@original_array.length / 2]
|
55
|
+
end
|
56
|
+
|
57
|
+
def mode
|
58
|
+
frequency = @original_array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
|
59
|
+
|
60
|
+
@original_array.max_by { |v| frequency[v] }
|
61
|
+
end
|
62
|
+
|
30
63
|
end
|
31
64
|
end
|
data/lib/to_histogram.rb
CHANGED
@@ -2,9 +2,9 @@ require_relative './histogram'
|
|
2
2
|
require_relative './stdout_print'
|
3
3
|
|
4
4
|
class Array
|
5
|
-
def to_histogram(num_buckets: 10)
|
6
|
-
histogram = ToHistogram::Histogram.new(self, num_buckets)
|
7
|
-
stdout_print = ToHistogram::StdoutPrint.new(histogram)
|
5
|
+
def to_histogram(num_buckets: 10, percentile: 100)
|
6
|
+
histogram = ToHistogram::Histogram.new(self, num_buckets, percentile)
|
7
|
+
stdout_print = ToHistogram::StdoutPrint.new(histogram, self)
|
8
8
|
|
9
9
|
stdout_print.invoke
|
10
10
|
|