to_histogram 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bucketizer.rb +47 -26
- data/lib/histogram.rb +4 -2
- data/lib/stdout_print.rb +8 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc64fe5ed589d7281499abede70e628d1d185fff
|
4
|
+
data.tar.gz: d0ed5fa267e3328cfd72fba7625ad500cfebc521
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8062a66b610ee50b52c3053c4fb514cdb8edb70226b51500df6918e72dca7ad7457ca37e2826c0c73a0d2bb299390b6ff7481883f77e9114fbb0bf70c9e25dea
|
7
|
+
data.tar.gz: b4ad12cd120b69b86ce20ce032c03bb3e1c0c8112ac10f1783519ffe42bf176d3a0969cb47a0eeea24a869fc5145d20fe783ae4a949eed3fbd1e77e36b791b3c
|
data/lib/bucketizer.rb
CHANGED
@@ -1,35 +1,56 @@
|
|
1
1
|
module ToHistogram
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
l_index = i
|
23
|
-
next_bucket += bucket_increments
|
24
|
-
end
|
25
|
-
end
|
3
|
+
class Bucketizer
|
4
|
+
|
5
|
+
def initialize(array, num_buckets)
|
6
|
+
@arr = array.sort
|
7
|
+
@num_buckets = num_buckets
|
8
|
+
@bucket_increments = get_bucket_increment
|
9
|
+
end
|
10
|
+
attr_reader :bucket_increments
|
11
|
+
|
12
|
+
def create_buckets
|
13
|
+
l_index = 0
|
14
|
+
next_bucket = (@bucket_increments == 1 && @arr[0] == 0) ? 0 : @bucket_increments
|
15
|
+
buckets = []
|
16
|
+
|
17
|
+
@arr.each_with_index do |e, i|
|
18
|
+
break if buckets.length == (@num_buckets - 1)
|
19
|
+
|
20
|
+
if !(e <= next_bucket)
|
21
|
+
buckets << @arr[l_index..(i - 1)]
|
26
22
|
|
27
|
-
|
28
|
-
|
23
|
+
# Special case where all of the results fit into the first bucket
|
24
|
+
if buckets[0].length == @arr.length
|
25
|
+
l_index = (@arr.length)
|
26
|
+
break
|
29
27
|
end
|
30
28
|
|
31
|
-
|
29
|
+
l_index = i
|
30
|
+
next_bucket += @bucket_increments
|
32
31
|
end
|
32
|
+
end
|
33
33
|
|
34
|
+
# Stuff the remainder into the last bucket
|
35
|
+
if(l_index <= (@arr.length - 1))
|
36
|
+
buckets << @arr[l_index..(@arr.length - 1)]
|
37
|
+
end
|
38
|
+
|
39
|
+
return buckets
|
34
40
|
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def get_bucket_increment
|
44
|
+
if(@arr.length == 0)
|
45
|
+
return 0
|
46
|
+
elsif(@arr.length <= @num_buckets)
|
47
|
+
increment = ((@arr[-1] - @arr[0]) / @num_buckets)
|
48
|
+
else
|
49
|
+
increment = ((@arr[(@arr.length * 0.9).to_i - 1] - @arr[(@arr.length * 0.1).to_i - 1]) / @num_buckets.to_f).ceil
|
50
|
+
end
|
51
|
+
|
52
|
+
return increment
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
35
56
|
end
|
data/lib/histogram.rb
CHANGED
@@ -6,9 +6,11 @@ module ToHistogram
|
|
6
6
|
include Enumerable
|
7
7
|
|
8
8
|
def initialize(array, num_buckets)
|
9
|
-
bucketizer = Bucketizer.new()
|
10
|
-
@buckets = bucketizer.create_buckets
|
9
|
+
bucketizer = Bucketizer.new(array, num_buckets)
|
10
|
+
@buckets = bucketizer.create_buckets
|
11
|
+
@increments = bucketizer.bucket_increments
|
11
12
|
end
|
13
|
+
attr_reader :increments
|
12
14
|
|
13
15
|
def each(&block)
|
14
16
|
@buckets.each do |b|
|
data/lib/stdout_print.rb
CHANGED
@@ -6,17 +6,22 @@ module ToHistogram
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def invoke
|
9
|
-
last_value = nil
|
10
9
|
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
11
10
|
printf("%-20s %-20s %-30s %-20s \n\n", "range", "frequency", " percentage (out of #{total_data_value_length})", "histogram (percetage)")
|
12
11
|
|
13
|
-
@histogram.
|
14
|
-
range =
|
12
|
+
@histogram.each_with_index do |b, i|
|
13
|
+
range = "#{b[0]} to #{b[-1]}"
|
15
14
|
frequency = b.length
|
16
15
|
percentage = ((frequency.to_f / total_data_value_length) * 100)
|
17
16
|
stars = ''
|
18
17
|
percentage.round.times { |x| stars << '*' }
|
19
18
|
|
19
|
+
if(i == (@histogram.length - 1))
|
20
|
+
if(b[-1] - b[0] != 0 && (b[-1] - b[0] > @histogram.increments))
|
21
|
+
range = "> than #{b[0]}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
20
25
|
printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
21
26
|
last_value = b[-1]
|
22
27
|
end
|