to_histogram 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/averages.rb +15 -0
- data/lib/bucketizer.rb +19 -12
- data/lib/stdout_print.rb +18 -28
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d76599692e37ff86d3d2ebaa05aa54da8b9da7ab
|
4
|
+
data.tar.gz: 3149b591c776df617496904bb2066a8f8fd64d41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd0e9518fe1e6841687b91093acf793edbe89ec56b85b7778f3272a1b48146375893a4a3b08ff073b0efd9ed5d35d3356ad063d1aa5d1d702a408783f720bbf8
|
7
|
+
data.tar.gz: 609d746f2f3fe63a9a12b2db5480aaddaf17d9e66cbdd71f15b7829777c7f7d28766593e6024e8be57320cc13f4486b3a2daec52167ac2e5553a0de3154e5444
|
data/lib/averages.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Averages
|
2
|
+
def mean(array)
|
3
|
+
array.reduce(:+) / array.length
|
4
|
+
end
|
5
|
+
|
6
|
+
def median(array)
|
7
|
+
array[array.length / 2]
|
8
|
+
end
|
9
|
+
|
10
|
+
def mode(array)
|
11
|
+
frequency = array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
|
12
|
+
|
13
|
+
array.max_by { |v| frequency[v] }
|
14
|
+
end
|
15
|
+
end
|
data/lib/bucketizer.rb
CHANGED
@@ -6,20 +6,17 @@ module ToHistogram
|
|
6
6
|
@arr = array.sort
|
7
7
|
@num_buckets = num_buckets
|
8
8
|
@percentile =percentile
|
9
|
-
|
9
|
+
|
10
|
+
remove_elements_outside_of_percentile
|
11
|
+
@bucket_increments = get_bucket_increment
|
10
12
|
end
|
11
13
|
attr_reader :bucket_increments, :arr
|
12
14
|
|
13
15
|
def create_buckets
|
14
16
|
l_index = 0
|
15
|
-
next_bucket = @bucket_increments
|
17
|
+
next_bucket = get_initial_next_bucket(@bucket_increments)
|
16
18
|
buckets = []
|
17
19
|
|
18
|
-
# Remove any elements outside of the percentile
|
19
|
-
if(@percentile != 100)
|
20
|
-
@arr = @arr[0..(@arr.length * (@percentile / 100.0) - 1).to_i]
|
21
|
-
end
|
22
|
-
|
23
20
|
# Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
|
24
21
|
if(@arr.count(0) > 0 && next_bucket == 1)
|
25
22
|
bucket_0 = []
|
@@ -56,15 +53,25 @@ module ToHistogram
|
|
56
53
|
end
|
57
54
|
|
58
55
|
private
|
59
|
-
def
|
60
|
-
|
56
|
+
def get_initial_next_bucket(increments)
|
57
|
+
if(@arr[0] != nil && @arr[0] < 0)
|
58
|
+
return (@arr[0] + increments)
|
59
|
+
else
|
60
|
+
return increments
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def remove_elements_outside_of_percentile
|
65
|
+
if(@percentile != 100)
|
66
|
+
@arr = @arr[0..(@arr.length * (@percentile / 100.0) - 1).to_i]
|
67
|
+
end
|
68
|
+
end
|
61
69
|
|
70
|
+
def get_bucket_increment
|
62
71
|
if(@arr.length == 0)
|
63
72
|
return 0
|
64
|
-
elsif(@arr.length <= @num_buckets)
|
65
|
-
increment = ((@arr[-1] - @arr[0]) / @num_buckets)
|
66
73
|
else
|
67
|
-
increment = ((@arr[
|
74
|
+
increment = ((@arr[-1] - @arr[0]) / @num_buckets.to_f).ceil
|
68
75
|
end
|
69
76
|
|
70
77
|
return increment
|
data/lib/stdout_print.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
|
+
require_relative './averages'
|
2
|
+
|
1
3
|
module ToHistogram
|
2
4
|
|
3
5
|
class StdoutPrint
|
4
|
-
|
6
|
+
include Averages
|
7
|
+
|
8
|
+
def initialize(histogram, original_array, stdout=$stdout)
|
5
9
|
@histogram = histogram
|
6
10
|
@original_array = original_array.sort
|
11
|
+
@stdout = stdout
|
7
12
|
end
|
8
13
|
|
9
14
|
def invoke
|
@@ -14,25 +19,25 @@ module ToHistogram
|
|
14
19
|
|
15
20
|
private
|
16
21
|
def print_header
|
17
|
-
puts "\n**************************************************************"
|
18
|
-
puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile}, print_info: true)"
|
22
|
+
@stdout.puts "\n**************************************************************"
|
23
|
+
@stdout.puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile}, print_info: true)"
|
19
24
|
|
20
|
-
puts "\n"
|
25
|
+
@stdout.puts "\n"
|
21
26
|
|
22
27
|
percentile_info = (@histogram.percentile == 100) ? '' : "(Numbers limited to the #{@histogram.percentile}th percentile)"
|
23
|
-
puts "Data set used in this calculation #{percentile_info}"
|
24
|
-
puts "Data set Size: #{@histogram.arr.length} items"
|
25
|
-
puts "Min Value: #{@histogram.arr[0]}, Max Value: #{@histogram.arr[-1]}"
|
26
|
-
puts "Mean: #{mean(@histogram.arr)}, Median: #{median(@histogram.arr)}, Mode: #{mode(@histogram.arr)}"
|
27
|
-
puts "\n"
|
28
|
+
@stdout.puts "Data set used in this calculation #{percentile_info}"
|
29
|
+
@stdout.puts "Data set Size: #{@histogram.arr.length} items"
|
30
|
+
@stdout.puts "Min Value: #{@histogram.arr[0]}, Max Value: #{@histogram.arr[-1]}"
|
31
|
+
@stdout.puts "Mean: #{mean(@histogram.arr)}, Median: #{median(@histogram.arr)}, Mode: #{mode(@histogram.arr)}"
|
32
|
+
@stdout.puts "\n"
|
28
33
|
|
29
|
-
puts "Histogram bucket sizes: #{@histogram.increments}"
|
30
|
-
puts "**************************************************************\n\n"
|
34
|
+
@stdout.puts "Histogram bucket sizes: #{@histogram.increments}"
|
35
|
+
@stdout.puts "**************************************************************\n\n"
|
31
36
|
end
|
32
37
|
|
33
38
|
def print_body
|
34
39
|
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
35
|
-
printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
|
40
|
+
@stdout.printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
|
36
41
|
|
37
42
|
@histogram.each_with_index do |b, i|
|
38
43
|
next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
|
@@ -48,24 +53,9 @@ module ToHistogram
|
|
48
53
|
end
|
49
54
|
end
|
50
55
|
|
51
|
-
printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
52
|
-
last_value = b[-1]
|
56
|
+
@stdout.printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
53
57
|
end
|
54
58
|
end
|
55
59
|
|
56
|
-
def mean(array)
|
57
|
-
array.reduce(:+) / array.length
|
58
|
-
end
|
59
|
-
|
60
|
-
def median(array)
|
61
|
-
array[array.length / 2]
|
62
|
-
end
|
63
|
-
|
64
|
-
def mode(array)
|
65
|
-
frequency = array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
|
66
|
-
|
67
|
-
array.max_by { |v| frequency[v] }
|
68
|
-
end
|
69
|
-
|
70
60
|
end
|
71
61
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: to_histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Sykes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Adds #to_histogram to Array. Returns a histogram distribution object
|
14
14
|
from an Array and optionally prints detailed info to stdout.'
|
@@ -17,6 +17,7 @@ executables: []
|
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
|
+
- lib/averages.rb
|
20
21
|
- lib/bucketizer.rb
|
21
22
|
- lib/histogram.rb
|
22
23
|
- lib/stdout_print.rb
|