to_histogram 1.0.11 → 1.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bucket.rb +15 -0
- data/lib/bucketizer.rb +16 -7
- data/lib/histogram.rb +11 -1
- data/lib/stdout_print.rb +9 -14
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95f6c12298b9da0f4bf82a323b643cb61309bbd8
|
4
|
+
data.tar.gz: cb98da14fc867bb1a50d150c1c2dbb0e1317e997
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86bfa3f3d6f6de5033c1b6036096dc6c25cc1f0fe04dc195c9bcdb1e8d45b164ae7b467a7f3d856bdf72a404f91eb7326a6a263d3065d5077c71787312929c60
|
7
|
+
data.tar.gz: 9ab0280cf88d0f7fc71f19285a061baa5f733635e3f59efc599c98d61ec3075c0c315a981e40a0183770a954920706180314de7bef99ef44313ab04147c39b7c
|
data/lib/bucket.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module ToHistogram
|
2
|
+
class Bucket
|
3
|
+
|
4
|
+
def initialize(from, to, contents)
|
5
|
+
@from = from
|
6
|
+
@to = to
|
7
|
+
@contents = contents
|
8
|
+
end
|
9
|
+
attr_reader :from, :to, :contents
|
10
|
+
|
11
|
+
def inspect
|
12
|
+
puts "#{self.class}: @from: #{@from}, @to: #{@to}, @contents: #{@contents.class}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/bucketizer.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative './bucket'
|
2
|
+
|
1
3
|
module ToHistogram
|
2
4
|
|
3
5
|
class Bucketizer
|
@@ -14,6 +16,7 @@ module ToHistogram
|
|
14
16
|
|
15
17
|
def create_buckets
|
16
18
|
l_index = 0
|
19
|
+
last_bucket = @arr[0]
|
17
20
|
next_bucket = get_initial_next_bucket(@bucket_width)
|
18
21
|
buckets = []
|
19
22
|
|
@@ -21,9 +24,10 @@ module ToHistogram
|
|
21
24
|
if(@arr.count(0) > 0 && next_bucket == 1)
|
22
25
|
bucket_0 = []
|
23
26
|
@arr.count(0).times { bucket_0 << @arr.shift }
|
24
|
-
buckets << bucket_0
|
27
|
+
buckets << Bucket.new(0, 0, bucket_0)
|
25
28
|
|
26
|
-
|
29
|
+
last_bucket = 1
|
30
|
+
next_bucket = 2
|
27
31
|
end
|
28
32
|
|
29
33
|
# Iterate thorough the remainder of the list in the normal case
|
@@ -31,20 +35,23 @@ module ToHistogram
|
|
31
35
|
break if buckets.length == (@num_buckets - 1)
|
32
36
|
|
33
37
|
if (e >= next_bucket)
|
34
|
-
buckets << @arr[l_index..(i - 1)]
|
38
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, @arr[l_index..(i - 1)])
|
35
39
|
|
36
40
|
# Special case here also where all of the results fit into the first bucket
|
37
|
-
if buckets[0].length == @arr.length
|
41
|
+
if buckets[0].contents.length == @arr.length
|
38
42
|
l_index = (@arr.length)
|
39
43
|
break
|
40
44
|
end
|
41
45
|
|
42
46
|
l_index = i
|
47
|
+
last_bucket = next_bucket
|
43
48
|
next_bucket += @bucket_width
|
44
49
|
|
45
50
|
# Add empty buckets until the next bucket is greater than the current l_index
|
46
51
|
while(next_bucket < @arr[l_index])
|
47
|
-
|
52
|
+
|
53
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, [])
|
54
|
+
last_bucket = next_bucket
|
48
55
|
next_bucket += @bucket_width
|
49
56
|
end
|
50
57
|
end
|
@@ -52,7 +59,7 @@ module ToHistogram
|
|
52
59
|
|
53
60
|
# Stuff the remainder into the last bucket
|
54
61
|
if(l_index <= (@arr.length - 1))
|
55
|
-
buckets << @arr[l_index..(@arr.length - 1)]
|
62
|
+
buckets << Bucket.new(last_bucket, next_bucket - 1, @arr[l_index..(@arr.length - 1)])
|
56
63
|
end
|
57
64
|
|
58
65
|
return buckets
|
@@ -62,8 +69,10 @@ module ToHistogram
|
|
62
69
|
def get_initial_next_bucket(increments)
|
63
70
|
if(@arr[0] != nil && @arr[0] < 0)
|
64
71
|
return (@arr[0] + increments)
|
65
|
-
|
72
|
+
elsif(@arr[0] == 0 || @arr[0] == nil)
|
66
73
|
return increments
|
74
|
+
else
|
75
|
+
return increments + @arr[0]
|
67
76
|
end
|
68
77
|
end
|
69
78
|
|
data/lib/histogram.rb
CHANGED
@@ -9,7 +9,7 @@ module ToHistogram
|
|
9
9
|
bucketizer = Bucketizer.new(array, num_buckets: num_buckets, bucket_width: bucket_width, percentile: percentile)
|
10
10
|
|
11
11
|
@buckets = bucketizer.create_buckets
|
12
|
-
@bucket_width
|
12
|
+
@bucket_width = bucketizer.bucket_width
|
13
13
|
@percentile = percentile
|
14
14
|
@num_buckets = num_buckets
|
15
15
|
end
|
@@ -29,6 +29,16 @@ module ToHistogram
|
|
29
29
|
return @buckets.length
|
30
30
|
end
|
31
31
|
|
32
|
+
def bucket_contents_length
|
33
|
+
@buckets.reduce(0) { |sum, x| sum + x.contents.length }
|
34
|
+
end
|
35
|
+
|
36
|
+
def bucket_contents_values
|
37
|
+
a = []
|
38
|
+
@buckets.map { |b| a << b.contents }
|
39
|
+
return a.flatten
|
40
|
+
end
|
41
|
+
|
32
42
|
def inspect
|
33
43
|
return "class: #{self.class} object_id: #{self.object_id}"
|
34
44
|
end
|
data/lib/stdout_print.rb
CHANGED
@@ -26,10 +26,10 @@ module ToHistogram
|
|
26
26
|
|
27
27
|
percentile_info = (@histogram.percentile == 100) ? '' : "(Numbers limited to the #{@histogram.percentile}th percentile)"
|
28
28
|
@stdout.puts "Data set used in this calculation #{percentile_info}"
|
29
|
-
@stdout.puts "Data set Size: #{@histogram.
|
29
|
+
@stdout.puts "Data set Size: #{@histogram.bucket_contents_length} items"
|
30
30
|
|
31
|
-
@stdout.puts "Min Value: #{@histogram[0][0]}, Max Value: #{@histogram[-1][-1]}"
|
32
|
-
@stdout.puts "Mean: #{mean(@histogram.
|
31
|
+
@stdout.puts "Min Value: #{@histogram[0].contents[0]}, Max Value: #{@histogram[-1].contents[-1]}"
|
32
|
+
@stdout.puts "Mean: #{mean(@histogram.bucket_contents_values)}, Median: #{median(@histogram.bucket_contents_values)}, Mode: #{mode(@histogram.bucket_contents_values)}"
|
33
33
|
@stdout.puts "\n"
|
34
34
|
|
35
35
|
@stdout.puts "Histogram bucket width: #{@histogram.bucket_width}"
|
@@ -37,28 +37,23 @@ module ToHistogram
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def print_body
|
40
|
-
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
41
40
|
@stdout.printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
|
42
|
-
|
43
|
-
from = @histogram[0][0]
|
44
|
-
to = (from + @histogram.bucket_width - 1)
|
41
|
+
|
45
42
|
@histogram.each_with_index do |b, i|
|
46
43
|
#next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
|
47
|
-
range = "#{from} to #{to}"
|
48
|
-
frequency = b.length
|
49
|
-
percentage = ((frequency.to_f /
|
44
|
+
range = "#{@histogram[i].from} to #{@histogram[i].to}"
|
45
|
+
frequency = b.contents.length
|
46
|
+
percentage = ((frequency.to_f / @histogram.bucket_contents_length) * 100)
|
50
47
|
stars = ''
|
51
48
|
percentage.round.times { |x| stars << '*' }
|
52
49
|
|
53
50
|
if(i == (@histogram.length - 1))
|
54
|
-
if(b[-1] - b[0] != 0 && (b[-1] - b[0] > @histogram.bucket_width))
|
55
|
-
range = "> than #{b[0]}"
|
51
|
+
if(b.contents[-1] - b.contents[0] != 0 && (b.contents[-1] - b.contents[0] > @histogram.bucket_width))
|
52
|
+
range = "> than #{b.contents[0]}"
|
56
53
|
end
|
57
54
|
end
|
58
55
|
|
59
56
|
@stdout.printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
60
|
-
from = to + 1
|
61
|
-
to += @histogram.bucket_width
|
62
57
|
end
|
63
58
|
end
|
64
59
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: to_histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Sykes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Adds #to_histogram to Array. Returns a histogram distribution object
|
14
14
|
from an Array and optionally prints detailed info to stdout.'
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/averages.rb
|
21
|
+
- lib/bucket.rb
|
21
22
|
- lib/bucketizer.rb
|
22
23
|
- lib/histogram.rb
|
23
24
|
- lib/stdout_print.rb
|