to_histogram 1.0.2 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bucketizer.rb +47 -26
- data/lib/histogram.rb +4 -2
- data/lib/stdout_print.rb +8 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc64fe5ed589d7281499abede70e628d1d185fff
|
4
|
+
data.tar.gz: d0ed5fa267e3328cfd72fba7625ad500cfebc521
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8062a66b610ee50b52c3053c4fb514cdb8edb70226b51500df6918e72dca7ad7457ca37e2826c0c73a0d2bb299390b6ff7481883f77e9114fbb0bf70c9e25dea
|
7
|
+
data.tar.gz: b4ad12cd120b69b86ce20ce032c03bb3e1c0c8112ac10f1783519ffe42bf176d3a0969cb47a0eeea24a869fc5145d20fe783ae4a949eed3fbd1e77e36b791b3c
|
data/lib/bucketizer.rb
CHANGED
@@ -1,35 +1,56 @@
|
|
1
1
|
module ToHistogram
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
l_index = i
|
23
|
-
next_bucket += bucket_increments
|
24
|
-
end
|
25
|
-
end
|
3
|
+
class Bucketizer
|
4
|
+
|
5
|
+
def initialize(array, num_buckets)
|
6
|
+
@arr = array.sort
|
7
|
+
@num_buckets = num_buckets
|
8
|
+
@bucket_increments = get_bucket_increment
|
9
|
+
end
|
10
|
+
attr_reader :bucket_increments
|
11
|
+
|
12
|
+
def create_buckets
|
13
|
+
l_index = 0
|
14
|
+
next_bucket = (@bucket_increments == 1 && @arr[0] == 0) ? 0 : @bucket_increments
|
15
|
+
buckets = []
|
16
|
+
|
17
|
+
@arr.each_with_index do |e, i|
|
18
|
+
break if buckets.length == (@num_buckets - 1)
|
19
|
+
|
20
|
+
if !(e <= next_bucket)
|
21
|
+
buckets << @arr[l_index..(i - 1)]
|
26
22
|
|
27
|
-
|
28
|
-
|
23
|
+
# Special case where all of the results fit into the first bucket
|
24
|
+
if buckets[0].length == @arr.length
|
25
|
+
l_index = (@arr.length)
|
26
|
+
break
|
29
27
|
end
|
30
28
|
|
31
|
-
|
29
|
+
l_index = i
|
30
|
+
next_bucket += @bucket_increments
|
32
31
|
end
|
32
|
+
end
|
33
33
|
|
34
|
+
# Stuff the remainder into the last bucket
|
35
|
+
if(l_index <= (@arr.length - 1))
|
36
|
+
buckets << @arr[l_index..(@arr.length - 1)]
|
37
|
+
end
|
38
|
+
|
39
|
+
return buckets
|
34
40
|
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def get_bucket_increment
|
44
|
+
if(@arr.length == 0)
|
45
|
+
return 0
|
46
|
+
elsif(@arr.length <= @num_buckets)
|
47
|
+
increment = ((@arr[-1] - @arr[0]) / @num_buckets)
|
48
|
+
else
|
49
|
+
increment = ((@arr[(@arr.length * 0.9).to_i - 1] - @arr[(@arr.length * 0.1).to_i - 1]) / @num_buckets.to_f).ceil
|
50
|
+
end
|
51
|
+
|
52
|
+
return increment
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
35
56
|
end
|
data/lib/histogram.rb
CHANGED
@@ -6,9 +6,11 @@ module ToHistogram
|
|
6
6
|
include Enumerable
|
7
7
|
|
8
8
|
def initialize(array, num_buckets)
|
9
|
-
bucketizer = Bucketizer.new()
|
10
|
-
@buckets = bucketizer.create_buckets
|
9
|
+
bucketizer = Bucketizer.new(array, num_buckets)
|
10
|
+
@buckets = bucketizer.create_buckets
|
11
|
+
@increments = bucketizer.bucket_increments
|
11
12
|
end
|
13
|
+
attr_reader :increments
|
12
14
|
|
13
15
|
def each(&block)
|
14
16
|
@buckets.each do |b|
|
data/lib/stdout_print.rb
CHANGED
@@ -6,17 +6,22 @@ module ToHistogram
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def invoke
|
9
|
-
last_value = nil
|
10
9
|
total_data_value_length = (@histogram.map { |b| b.length }).reduce(:+)
|
11
10
|
printf("%-20s %-20s %-30s %-20s \n\n", "range", "frequency", " percentage (out of #{total_data_value_length})", "histogram (percetage)")
|
12
11
|
|
13
|
-
@histogram.
|
14
|
-
range =
|
12
|
+
@histogram.each_with_index do |b, i|
|
13
|
+
range = "#{b[0]} to #{b[-1]}"
|
15
14
|
frequency = b.length
|
16
15
|
percentage = ((frequency.to_f / total_data_value_length) * 100)
|
17
16
|
stars = ''
|
18
17
|
percentage.round.times { |x| stars << '*' }
|
19
18
|
|
19
|
+
if(i == (@histogram.length - 1))
|
20
|
+
if(b[-1] - b[0] != 0 && (b[-1] - b[0] > @histogram.increments))
|
21
|
+
range = "> than #{b[0]}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
20
25
|
printf("%-20s | %-20s | %-30s | %-20s \n", range, frequency, ('%.4f' % percentage), stars)
|
21
26
|
last_value = b[-1]
|
22
27
|
end
|