to_histogram 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bc64fe5ed589d7281499abede70e628d1d185fff
4
- data.tar.gz: d0ed5fa267e3328cfd72fba7625ad500cfebc521
3
+ metadata.gz: aa1cb2265ca6164d3b0f7dd6292c1d499bfe0488
4
+ data.tar.gz: 7631a5d5fa1138dc8c606ea20796bd6728d8834b
5
5
  SHA512:
6
- metadata.gz: 8062a66b610ee50b52c3053c4fb514cdb8edb70226b51500df6918e72dca7ad7457ca37e2826c0c73a0d2bb299390b6ff7481883f77e9114fbb0bf70c9e25dea
7
- data.tar.gz: b4ad12cd120b69b86ce20ce032c03bb3e1c0c8112ac10f1783519ffe42bf176d3a0969cb47a0eeea24a869fc5145d20fe783ae4a949eed3fbd1e77e36b791b3c
6
+ metadata.gz: e5c41cd8733f140379ce4eadd116bd217549faef6e89ab6934374bae82a708def16b361015678b9d8fb896c6d5564577dd9ab84e3618e98e02b7708e59416c19
7
+ data.tar.gz: 426f7996e9e59fcccff17e8fa7d479b550b2f4367cb9ef6cfef45081dd780f5580f48260c4bb55c39c140d2045106aa380993742682796792ffcae094ca77df4
data/lib/bucketizer.rb CHANGED
@@ -2,25 +2,35 @@ module ToHistogram
2
2
 
3
3
  class Bucketizer
4
4
 
5
- def initialize(array, num_buckets)
5
+ def initialize(array, num_buckets, percentile)
6
6
  @arr = array.sort
7
7
  @num_buckets = num_buckets
8
- @bucket_increments = get_bucket_increment
8
+ @bucket_increments = get_bucket_increment(percentile)
9
9
  end
10
10
  attr_reader :bucket_increments
11
11
 
12
12
  def create_buckets
13
13
  l_index = 0
14
- next_bucket = (@bucket_increments == 1 && @arr[0] == 0) ? 0 : @bucket_increments
14
+ next_bucket = @bucket_increments
15
15
  buckets = []
16
16
 
17
+ # Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
18
+ if(@arr.count(0) > 0 && next_bucket == 1)
19
+ bucket_0 = []
20
+ @arr.count(0).times { bucket_0 << @arr.shift }
21
+ buckets << bucket_0
22
+
23
+ next_bucket += 1
24
+ end
25
+
26
+ # Iterate thorough the remainder of the list in the normal case
17
27
  @arr.each_with_index do |e, i|
18
28
  break if buckets.length == (@num_buckets - 1)
19
29
 
20
- if !(e <= next_bucket)
30
+ if (e >= next_bucket)
21
31
  buckets << @arr[l_index..(i - 1)]
22
32
 
23
- # Special case where all of the results fit into the first bucket
33
+ # Special case here also where all of the results fit into the first bucket
24
34
  if buckets[0].length == @arr.length
25
35
  l_index = (@arr.length)
26
36
  break
@@ -40,13 +50,15 @@ module ToHistogram
40
50
  end
41
51
 
42
52
  private
43
- def get_bucket_increment
53
+ def get_bucket_increment(percentile)
54
+ nth_percentile = percentile / 100.0
55
+
44
56
  if(@arr.length == 0)
45
57
  return 0
46
58
  elsif(@arr.length <= @num_buckets)
47
59
  increment = ((@arr[-1] - @arr[0]) / @num_buckets)
48
60
  else
49
- increment = ((@arr[(@arr.length * 0.9).to_i - 1] - @arr[(@arr.length * 0.1).to_i - 1]) / @num_buckets.to_f).ceil
61
+ increment = ((@arr[(@arr.length * nth_percentile).to_i - 1] - @arr[0]) / @num_buckets.to_f).ceil
50
62
  end
51
63
 
52
64
  return increment
data/lib/histogram.rb CHANGED
@@ -5,12 +5,14 @@ module ToHistogram
5
5
  class Histogram
6
6
  include Enumerable
7
7
 
8
- def initialize(array, num_buckets)
9
- bucketizer = Bucketizer.new(array, num_buckets)
10
- @buckets = bucketizer.create_buckets
11
- @increments = bucketizer.bucket_increments
8
+ def initialize(array, num_buckets, percentile)
9
+ bucketizer = Bucketizer.new(array, num_buckets, percentile)
10
+ @buckets = bucketizer.create_buckets
11
+ @increments = bucketizer.bucket_increments
12
+ @percentile = percentile
13
+ @num_buckets = num_buckets
12
14
  end
13
- attr_reader :increments
15
+ attr_reader :increments, :percentile, :num_buckets
14
16
 
15
17
  def each(&block)
16
18
  @buckets.each do |b|
data/lib/stdout_print.rb CHANGED
@@ -1,16 +1,35 @@
1
1
  module ToHistogram
2
2
 
3
3
  class StdoutPrint
4
- def initialize(histogram)
5
- @histogram = histogram
4
+ def initialize(histogram, original_array)
5
+ @histogram = histogram
6
+ @original_array = original_array.sort
6
7
  end
7
8
 
8
9
  def invoke
10
+ print_header
11
+
12
+ print_body
13
+ end
14
+
15
+ private
16
+ def print_header
17
+ puts "\n**************************************************************"
18
+ puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile})"
19
+ puts "\n"
20
+ puts "Min Value: #{@original_array[0]}, Max Value: #{@original_array[-1]}"
21
+ puts "Mean: #{mean}, Median: #{median}, Mode: #{mode}"
22
+ puts "Histogram bucket sizes: #{@histogram.increments}"
23
+ puts "**************************************************************\n\n"
24
+ end
25
+
26
+ def print_body
9
27
  total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
10
- printf("%-20s %-20s %-30s %-20s \n\n", "range", "frequency", " percentage (out of #{total_data_value_length})", "histogram (percetage)")
28
+ printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage (out of #{total_data_value_length})", "Histogram (each * =~ 1%)")
11
29
 
12
30
  @histogram.each_with_index do |b, i|
13
- range = "#{b[0]} to #{b[-1]}"
31
+ next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
32
+ range = "#{b[0]} to #{next_bucket}"
14
33
  frequency = b.length
15
34
  percentage = ((frequency.to_f / total_data_value_length) * 100)
16
35
  stars = ''
@@ -27,5 +46,19 @@ module ToHistogram
27
46
  end
28
47
  end
29
48
 
49
+ def mean
50
+ @original_array.reduce(:+) / @original_array.length
51
+ end
52
+
53
+ def median
54
+ @original_array[@original_array.length / 2]
55
+ end
56
+
57
+ def mode
58
+ frequency = @original_array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
59
+
60
+ @original_array.max_by { |v| frequency[v] }
61
+ end
62
+
30
63
  end
31
64
  end
data/lib/to_histogram.rb CHANGED
@@ -2,9 +2,9 @@ require_relative './histogram'
2
2
  require_relative './stdout_print'
3
3
 
4
4
  class Array
5
- def to_histogram(num_buckets: 10)
6
- histogram = ToHistogram::Histogram.new(self, num_buckets)
7
- stdout_print = ToHistogram::StdoutPrint.new(histogram)
5
+ def to_histogram(num_buckets: 10, percentile: 100)
6
+ histogram = ToHistogram::Histogram.new(self, num_buckets, percentile)
7
+ stdout_print = ToHistogram::StdoutPrint.new(histogram, self)
8
8
 
9
9
  stdout_print.invoke
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: to_histogram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Sykes