to_histogram 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bc64fe5ed589d7281499abede70e628d1d185fff
4
- data.tar.gz: d0ed5fa267e3328cfd72fba7625ad500cfebc521
3
+ metadata.gz: aa1cb2265ca6164d3b0f7dd6292c1d499bfe0488
4
+ data.tar.gz: 7631a5d5fa1138dc8c606ea20796bd6728d8834b
5
5
  SHA512:
6
- metadata.gz: 8062a66b610ee50b52c3053c4fb514cdb8edb70226b51500df6918e72dca7ad7457ca37e2826c0c73a0d2bb299390b6ff7481883f77e9114fbb0bf70c9e25dea
7
- data.tar.gz: b4ad12cd120b69b86ce20ce032c03bb3e1c0c8112ac10f1783519ffe42bf176d3a0969cb47a0eeea24a869fc5145d20fe783ae4a949eed3fbd1e77e36b791b3c
6
+ metadata.gz: e5c41cd8733f140379ce4eadd116bd217549faef6e89ab6934374bae82a708def16b361015678b9d8fb896c6d5564577dd9ab84e3618e98e02b7708e59416c19
7
+ data.tar.gz: 426f7996e9e59fcccff17e8fa7d479b550b2f4367cb9ef6cfef45081dd780f5580f48260c4bb55c39c140d2045106aa380993742682796792ffcae094ca77df4
data/lib/bucketizer.rb CHANGED
@@ -2,25 +2,35 @@ module ToHistogram
2
2
 
3
3
  class Bucketizer
4
4
 
5
- def initialize(array, num_buckets)
5
+ def initialize(array, num_buckets, percentile)
6
6
  @arr = array.sort
7
7
  @num_buckets = num_buckets
8
- @bucket_increments = get_bucket_increment
8
+ @bucket_increments = get_bucket_increment(percentile)
9
9
  end
10
10
  attr_reader :bucket_increments
11
11
 
12
12
  def create_buckets
13
13
  l_index = 0
14
- next_bucket = (@bucket_increments == 1 && @arr[0] == 0) ? 0 : @bucket_increments
14
+ next_bucket = @bucket_increments
15
15
  buckets = []
16
16
 
17
+ # Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
18
+ if(@arr.count(0) > 0 && next_bucket == 1)
19
+ bucket_0 = []
20
+ @arr.count(0).times { bucket_0 << @arr.shift }
21
+ buckets << bucket_0
22
+
23
+ next_bucket += 1
24
+ end
25
+
26
+ # Iterate thorough the remainder of the list in the normal case
17
27
  @arr.each_with_index do |e, i|
18
28
  break if buckets.length == (@num_buckets - 1)
19
29
 
20
- if !(e <= next_bucket)
30
+ if (e >= next_bucket)
21
31
  buckets << @arr[l_index..(i - 1)]
22
32
 
23
- # Special case where all of the results fit into the first bucket
33
+ # Special case here also where all of the results fit into the first bucket
24
34
  if buckets[0].length == @arr.length
25
35
  l_index = (@arr.length)
26
36
  break
@@ -40,13 +50,15 @@ module ToHistogram
40
50
  end
41
51
 
42
52
  private
43
- def get_bucket_increment
53
+ def get_bucket_increment(percentile)
54
+ nth_percentile = percentile / 100.0
55
+
44
56
  if(@arr.length == 0)
45
57
  return 0
46
58
  elsif(@arr.length <= @num_buckets)
47
59
  increment = ((@arr[-1] - @arr[0]) / @num_buckets)
48
60
  else
49
- increment = ((@arr[(@arr.length * 0.9).to_i - 1] - @arr[(@arr.length * 0.1).to_i - 1]) / @num_buckets.to_f).ceil
61
+ increment = ((@arr[(@arr.length * nth_percentile).to_i - 1] - @arr[0]) / @num_buckets.to_f).ceil
50
62
  end
51
63
 
52
64
  return increment
data/lib/histogram.rb CHANGED
@@ -5,12 +5,14 @@ module ToHistogram
5
5
  class Histogram
6
6
  include Enumerable
7
7
 
8
- def initialize(array, num_buckets)
9
- bucketizer = Bucketizer.new(array, num_buckets)
10
- @buckets = bucketizer.create_buckets
11
- @increments = bucketizer.bucket_increments
8
+ def initialize(array, num_buckets, percentile)
9
+ bucketizer = Bucketizer.new(array, num_buckets, percentile)
10
+ @buckets = bucketizer.create_buckets
11
+ @increments = bucketizer.bucket_increments
12
+ @percentile = percentile
13
+ @num_buckets = num_buckets
12
14
  end
13
- attr_reader :increments
15
+ attr_reader :increments, :percentile, :num_buckets
14
16
 
15
17
  def each(&block)
16
18
  @buckets.each do |b|
data/lib/stdout_print.rb CHANGED
@@ -1,16 +1,35 @@
1
1
  module ToHistogram
2
2
 
3
3
  class StdoutPrint
4
- def initialize(histogram)
5
- @histogram = histogram
4
+ def initialize(histogram, original_array)
5
+ @histogram = histogram
6
+ @original_array = original_array.sort
6
7
  end
7
8
 
8
9
  def invoke
10
+ print_header
11
+
12
+ print_body
13
+ end
14
+
15
+ private
16
+ def print_header
17
+ puts "\n**************************************************************"
18
+ puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile})"
19
+ puts "\n"
20
+ puts "Min Value: #{@original_array[0]}, Max Value: #{@original_array[-1]}"
21
+ puts "Mean: #{mean}, Median: #{median}, Mode: #{mode}"
22
+ puts "Histogram bucket sizes: #{@histogram.increments}"
23
+ puts "**************************************************************\n\n"
24
+ end
25
+
26
+ def print_body
9
27
  total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
10
- printf("%-20s %-20s %-30s %-20s \n\n", "range", "frequency", " percentage (out of #{total_data_value_length})", "histogram (percetage)")
28
+ printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage (out of #{total_data_value_length})", "Histogram (each * =~ 1%)")
11
29
 
12
30
  @histogram.each_with_index do |b, i|
13
- range = "#{b[0]} to #{b[-1]}"
31
+ next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
32
+ range = "#{b[0]} to #{next_bucket}"
14
33
  frequency = b.length
15
34
  percentage = ((frequency.to_f / total_data_value_length) * 100)
16
35
  stars = ''
@@ -27,5 +46,19 @@ module ToHistogram
27
46
  end
28
47
  end
29
48
 
49
+ def mean
50
+ @original_array.reduce(:+) / @original_array.length
51
+ end
52
+
53
+ def median
54
+ @original_array[@original_array.length / 2]
55
+ end
56
+
57
+ def mode
58
+ frequency = @original_array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
59
+
60
+ @original_array.max_by { |v| frequency[v] }
61
+ end
62
+
30
63
  end
31
64
  end
data/lib/to_histogram.rb CHANGED
@@ -2,9 +2,9 @@ require_relative './histogram'
2
2
  require_relative './stdout_print'
3
3
 
4
4
  class Array
5
- def to_histogram(num_buckets: 10)
6
- histogram = ToHistogram::Histogram.new(self, num_buckets)
7
- stdout_print = ToHistogram::StdoutPrint.new(histogram)
5
+ def to_histogram(num_buckets: 10, percentile: 100)
6
+ histogram = ToHistogram::Histogram.new(self, num_buckets, percentile)
7
+ stdout_print = ToHistogram::StdoutPrint.new(histogram, self)
8
8
 
9
9
  stdout_print.invoke
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: to_histogram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Sykes