to_histogram 1.0.11 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bucket.rb +15 -0
- data/lib/bucketizer.rb +16 -7
- data/lib/histogram.rb +11 -1
- data/lib/stdout_print.rb +9 -14
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95f6c12298b9da0f4bf82a323b643cb61309bbd8
|
4
|
+
data.tar.gz: cb98da14fc867bb1a50d150c1c2dbb0e1317e997
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86bfa3f3d6f6de5033c1b6036096dc6c25cc1f0fe04dc195c9bcdb1e8d45b164ae7b467a7f3d856bdf72a404f91eb7326a6a263d3065d5077c71787312929c60
|
7
|
+
data.tar.gz: 9ab0280cf88d0f7fc71f19285a061baa5f733635e3f59efc599c98d61ec3075c0c315a981e40a0183770a954920706180314de7bef99ef44313ab04147c39b7c
|
data/lib/bucket.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module ToHistogram
|
2
|
+
class Bucket
|
3
|
+
|
4
|
+
def initialize(from, to, contents)
|
5
|
+
@from = from
|
6
|
+
@to = to
|
7
|
+
@contents = contents
|
8
|
+
end
|
9
|
+
attr_reader :from, :to, :contents
|
10
|
+
|
11
|
+
def inspect
|
12
|
+
puts "#{self.class}: @from: #{@from}, @to: #{@to}, @contents: #{@contents.class}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/bucketizer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative './bucket'
|
2
|
+
|
1
3
|
module ToHistogram
|
2
4
|
|
3
5
|
class Bucketizer
|
@@ -14,6 +16,7 @@ module ToHistogram
|
|
14
16
|
|
15
17
|
def create_buckets
|
16
18
|
l_index = 0
|
19
|
+
last_bucket = @arr[0]
|
17
20
|
next_bucket = get_initial_next_bucket(@bucket_width)
|
18
21
|
buckets = []
|
19
22
|
|
@@ -21,9 +24,10 @@ module ToHistogram
|
|
21
24
|
if(@arr.count(0) > 0 && next_bucket == 1)
|
22
25
|
bucket_0 = []
|
23
26
|
@arr.count(0).times { bucket_0 << @arr.shift }
|
24
|
-
buckets << bucket_0
|
27
|
+
buckets << Bucket.new(0, 0, bucket_0)
|
25
28
|
|
26
|
-
|
29
|
+
last_bucket = 1
|
30
|
+
next_bucket = 2
|
27
31
|
end
|
28
32
|
|
29
33
|
# Iterate thorough the remainder of the list in the normal case
|
@@ -31,20 +35,23 @@ module ToHistogram
|
|
31
35
|
break if buckets.length == (@num_buckets - 1)
|
32
36
|
|
33
37
|
if (e >= next_bucket)
|
34
|
-
buckets << @arr[l_index..(i - 1)]
|
38
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, @arr[l_index..(i - 1)])
|
35
39
|
|
36
40
|
# Special case here also where all of the results fit into the first bucket
|
37
|
-
if buckets[0].length == @arr.length
|
41
|
+
if buckets[0].contents.length == @arr.length
|
38
42
|
l_index = (@arr.length)
|
39
43
|
break
|
40
44
|
end
|
41
45
|
|
42
46
|
l_index = i
|
47
|
+
last_bucket = next_bucket
|
43
48
|
next_bucket += @bucket_width
|
44
49
|
|
45
50
|
# Add empty buckets until the next bucket is greater than the current l_index
|
46
51
|
while(next_bucket < @arr[l_index])
|
47
|
-
|
52
|
+
|
53
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, [])
|
54
|
+
last_bucket = next_bucket
|
48
55
|
next_bucket += @bucket_width
|
49
56
|
end
|
50
57
|
end
|
@@ -52,7 +59,7 @@ module ToHistogram
|
|
52
59
|
|
53
60
|
# Stuff the remainder into the last bucket
|
54
61
|
if(l_index <= (@arr.length - 1))
|
55
|
-
buckets << @arr[l_index..(@arr.length - 1)]
|
62
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, @arr[l_index..(@arr.length - 1)])
|
56
63
|
end
|
57
64
|
|
58
65
|
return buckets
|
@@ -62,8 +69,10 @@ module ToHistogram
|
|
62
69
|
def get_initial_next_bucket(increments)
|
63
70
|
if(@arr[0] != nil && @arr[0] < 0)
|
64
71
|
return (@arr[0] + increments)
|
65
|
-
|
72
|
+
elsif(@arr[0] == 0 || @arr[0] == nil)
|
66
73
|
return increments
|
74
|
+
else
|
75
|
+
return increments + @arr[0]
|
67
76
|
end
|
68
77
|
end
|
69
78
|
|
data/lib/histogram.rb
CHANGED
@@ -9,7 +9,7 @@ module ToHistogram
|
|
9
9
|
bucketizer = Bucketizer.new(array, num_buckets: num_buckets, bucket_width: bucket_width, percentile: percentile)
|
10
10
|
|
11
11
|
@buckets = bucketizer.create_buckets
|
12
|
-
@bucket_width
|
12
|
+
@bucket_width = bucketizer.bucket_width
|
13
13
|
@percentile = percentile
|
14
14
|
@num_buckets = num_buckets
|
15
15
|
end
|
@@ -29,6 +29,16 @@ module ToHistogram
|
|
29
29
|
return @buckets.length
|
30
30
|
end
|
31
31
|
|
32
|
+
def bucket_contents_length
|
33
|
+
@buckets.reduce(0) { |sum, x| sum + x.contents.length }
|
34
|
+
end
|
35
|
+
|
36
|
+
def bucket_contents_values
|
37
|
+
a = []
|
38
|
+
@buckets.map { |b| a << b.contents }
|
39
|
+
return a.flatten
|
40
|
+
end
|
41
|
+
|
32
42
|
def inspect
|
33
43
|
return "class: #{self.class} object_id: #{self.object_id}"
|
34
44
|
end
|
data/lib/stdout_print.rb
CHANGED
@@ -26,10 +26,10 @@ module ToHistogram
|
|
26
26
|
|
27
27
|
percentile_info = (@histogram.percentile == 100) ? '' : "(Numbers limited to the #{@histogram.percentile}th percentile)"
|
28
28
|
@stdout.puts "Data set used in this calculation #{percentile_info}"
|
29
|
-
@stdout.puts "Data set Size: #{@histogram.
|
29
|
+
@stdout.puts "Data set Size: #{@histogram.bucket_contents_length} items"
|
30
30
|
|
31
|
-
@stdout.puts "Min Value: #{@histogram[0][0]}, Max Value: #{@histogram[-1][-1]}"
|
32
|
-
@stdout.puts "Mean: #{mean(@histogram.
|
31
|
+
@stdout.puts "Min Value: #{@histogram[0].contents[0]}, Max Value: #{@histogram[-1].contents[-1]}"
|
32
|
+
@stdout.puts "Mean: #{mean(@histogram.bucket_contents_values)}, Median: #{median(@histogram.bucket_contents_values)}, Mode: #{mode(@histogram.bucket_contents_values)}"
|
33
33
|
@stdout.puts "\n"
|
34
34
|
|
35
35
|
@stdout.puts "Histogram bucket width: #{@histogram.bucket_width}"
|
@@ -37,28 +37,23 @@ module ToHistogram
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def print_body
|
40
|
-
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
41
40
|
@stdout.printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
|
42
|
-
|
43
|
-
from = @histogram[0][0]
|
44
|
-
to = (from + @histogram.bucket_width - 1)
|
41
|
+
|
45
42
|
@histogram.each_with_index do |b, i|
|
46
43
|
#next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
|
47
|
-
range = "#{from} to #{to}"
|
48
|
-
frequency = b.length
|
49
|
-
percentage = ((frequency.to_f /
|
44
|
+
range = "#{@histogram[i].from} to #{@histogram[i].to}"
|
45
|
+
frequency = b.contents.length
|
46
|
+
percentage = ((frequency.to_f / @histogram.bucket_contents_length) * 100)
|
50
47
|
stars = ''
|
51
48
|
percentage.round.times { |x| stars << '*' }
|
52
49
|
|
53
50
|
if(i == (@histogram.length - 1))
|
54
|
-
if(b[-1] - b[0] != 0 && (b[-1] - b[0] > @histogram.bucket_width))
|
55
|
-
range = "> than #{b[0]}"
|
51
|
+
if(b.contents[-1] - b.contents[0] != 0 && (b.contents[-1] - b.contents[0] > @histogram.bucket_width))
|
52
|
+
range = "> than #{b.contents[0]}"
|
56
53
|
end
|
57
54
|
end
|
58
55
|
|
59
56
|
@stdout.printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
60
|
-
from = to + 1
|
61
|
-
to += @histogram.bucket_width
|
62
57
|
end
|
63
58
|
end
|
64
59
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: to_histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Sykes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Adds #to_histogram to Array. Returns a histogram distribution object
|
14
14
|
from an Array and optionally prints detailed info to stdout.'
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/averages.rb
|
21
|
+
- lib/bucket.rb
|
21
22
|
- lib/bucketizer.rb
|
22
23
|
- lib/histogram.rb
|
23
24
|
- lib/stdout_print.rb
|