to_histogram 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9601f9e6c6772c4cf1d71f2963caa864ddafbfd7
4
- data.tar.gz: dc6eaf01aae093f8ca0213fc561a876e015efdc5
3
+ metadata.gz: d76599692e37ff86d3d2ebaa05aa54da8b9da7ab
4
+ data.tar.gz: 3149b591c776df617496904bb2066a8f8fd64d41
5
5
  SHA512:
6
- metadata.gz: 07b9ee278e369bcc7d1349094b0d99d05e2db082a87d4fde855dca8a447830a2731f96b475be81dfa6954d262bdb76e3239da591e3d29cb5ee12b44606980fa4
7
- data.tar.gz: 9328e8cb97e0dfebbc213f4eebb1a7c18cbc78e6218380b19b57e8d20946d50db88665b8e613c4d7266a103740e4255aae2b93eee54cfa288ee81025cba43925
6
+ metadata.gz: dd0e9518fe1e6841687b91093acf793edbe89ec56b85b7778f3272a1b48146375893a4a3b08ff073b0efd9ed5d35d3356ad063d1aa5d1d702a408783f720bbf8
7
+ data.tar.gz: 609d746f2f3fe63a9a12b2db5480aaddaf17d9e66cbdd71f15b7829777c7f7d28766593e6024e8be57320cc13f4486b3a2daec52167ac2e5553a0de3154e5444
data/lib/averages.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Averages
2
+ def mean(array)
3
+ array.reduce(:+) / array.length
4
+ end
5
+
6
+ def median(array)
7
+ array[array.length / 2]
8
+ end
9
+
10
+ def mode(array)
11
+ frequency = array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
12
+
13
+ array.max_by { |v| frequency[v] }
14
+ end
15
+ end
data/lib/bucketizer.rb CHANGED
@@ -6,20 +6,17 @@ module ToHistogram
6
6
  @arr = array.sort
7
7
  @num_buckets = num_buckets
8
8
  @percentile =percentile
9
- @bucket_increments = get_bucket_increment(percentile)
9
+
10
+ remove_elements_outside_of_percentile
11
+ @bucket_increments = get_bucket_increment
10
12
  end
11
13
  attr_reader :bucket_increments, :arr
12
14
 
13
15
  def create_buckets
14
16
  l_index = 0
15
- next_bucket = @bucket_increments
17
+ next_bucket = get_initial_next_bucket(@bucket_increments)
16
18
  buckets = []
17
19
 
18
- # Remove any elements outside of the percentile
19
- if(@percentile != 100)
20
- @arr = @arr[0..(@arr.length * (@percentile / 100.0) - 1).to_i]
21
- end
22
-
23
20
  # Deal with the special case where we have elements that == 0 and an increment sizes of 1 (count 0 as a value and don't lump it in with 1)
24
21
  if(@arr.count(0) > 0 && next_bucket == 1)
25
22
  bucket_0 = []
@@ -56,15 +53,25 @@ module ToHistogram
56
53
  end
57
54
 
58
55
  private
59
- def get_bucket_increment(percentile)
60
- nth_percentile = percentile / 100.0
56
+ def get_initial_next_bucket(increments)
57
+ if(@arr[0] != nil && @arr[0] < 0)
58
+ return (@arr[0] + increments)
59
+ else
60
+ return increments
61
+ end
62
+ end
63
+
64
+ def remove_elements_outside_of_percentile
65
+ if(@percentile != 100)
66
+ @arr = @arr[0..(@arr.length * (@percentile / 100.0) - 1).to_i]
67
+ end
68
+ end
61
69
 
70
+ def get_bucket_increment
62
71
  if(@arr.length == 0)
63
72
  return 0
64
- elsif(@arr.length <= @num_buckets)
65
- increment = ((@arr[-1] - @arr[0]) / @num_buckets)
66
73
  else
67
- increment = ((@arr[(@arr.length * nth_percentile).to_i - 1] - @arr[0]) / @num_buckets.to_f).ceil
74
+ increment = ((@arr[-1] - @arr[0]) / @num_buckets.to_f).ceil
68
75
  end
69
76
 
70
77
  return increment
data/lib/stdout_print.rb CHANGED
@@ -1,9 +1,14 @@
1
+ require_relative './averages'
2
+
1
3
  module ToHistogram
2
4
 
3
5
  class StdoutPrint
4
- def initialize(histogram, original_array)
6
+ include Averages
7
+
8
+ def initialize(histogram, original_array, stdout=$stdout)
5
9
  @histogram = histogram
6
10
  @original_array = original_array.sort
11
+ @stdout = stdout
7
12
  end
8
13
 
9
14
  def invoke
@@ -14,25 +19,25 @@ module ToHistogram
14
19
 
15
20
  private
16
21
  def print_header
17
- puts "\n**************************************************************"
18
- puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile}, print_info: true)"
22
+ @stdout.puts "\n**************************************************************"
23
+ @stdout.puts "Results for #to_histogram(num_buckets: #{@histogram.num_buckets}, percentile: #{@histogram.percentile}, print_info: true)"
19
24
 
20
- puts "\n"
25
+ @stdout.puts "\n"
21
26
 
22
27
  percentile_info = (@histogram.percentile == 100) ? '' : "(Numbers limited to the #{@histogram.percentile}th percentile)"
23
- puts "Data set used in this calculation #{percentile_info}"
24
- puts "Data set Size: #{@histogram.arr.length} items"
25
- puts "Min Value: #{@histogram.arr[0]}, Max Value: #{@histogram.arr[-1]}"
26
- puts "Mean: #{mean(@histogram.arr)}, Median: #{median(@histogram.arr)}, Mode: #{mode(@histogram.arr)}"
27
- puts "\n"
28
+ @stdout.puts "Data set used in this calculation #{percentile_info}"
29
+ @stdout.puts "Data set Size: #{@histogram.arr.length} items"
30
+ @stdout.puts "Min Value: #{@histogram.arr[0]}, Max Value: #{@histogram.arr[-1]}"
31
+ @stdout.puts "Mean: #{mean(@histogram.arr)}, Median: #{median(@histogram.arr)}, Mode: #{mode(@histogram.arr)}"
32
+ @stdout.puts "\n"
28
33
 
29
- puts "Histogram bucket sizes: #{@histogram.increments}"
30
- puts "**************************************************************\n\n"
34
+ @stdout.puts "Histogram bucket sizes: #{@histogram.increments}"
35
+ @stdout.puts "**************************************************************\n\n"
31
36
  end
32
37
 
33
38
  def print_body
34
39
  total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
35
- printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
40
+ @stdout.printf("%-20s %-20s %-30s %-20s \n\n", "Range", "Frequency", " Percentage", "Histogram (each * =~ 1%)")
36
41
 
37
42
  @histogram.each_with_index do |b, i|
38
43
  next_bucket = (@histogram[i + 1]) ? @histogram[i + 1][0] : b[-1]
@@ -48,24 +53,9 @@ module ToHistogram
48
53
  end
49
54
  end
50
55
 
51
- printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
52
- last_value = b[-1]
56
+ @stdout.printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
53
57
  end
54
58
  end
55
59
 
56
- def mean(array)
57
- array.reduce(:+) / array.length
58
- end
59
-
60
- def median(array)
61
- array[array.length / 2]
62
- end
63
-
64
- def mode(array)
65
- frequency = array.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
66
-
67
- array.max_by { |v| frequency[v] }
68
- end
69
-
70
60
  end
71
61
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: to_histogram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8
4
+ version: 1.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Sykes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-07 00:00:00.000000000 Z
11
+ date: 2015-11-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Adds #to_histogram to Array. Returns a histogram distribution object
14
14
  from an Array and optionally prints detailed info to stdout.'
@@ -17,6 +17,7 @@ executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - lib/averages.rb
20
21
  - lib/bucketizer.rb
21
22
  - lib/histogram.rb
22
23
  - lib/stdout_print.rb