buncher 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/buncher.rb +8 -10
- data/test/test_buncher.rb +5 -5
- metadata +1 -1
data/lib/buncher.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'buncher/buncher'
|
2
2
|
module Buncher
|
3
|
-
VERSION = "1.0.
|
3
|
+
VERSION = "1.0.3"
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
5
5
|
class Cluster
|
6
6
|
attr_accessor :elements
|
@@ -60,14 +60,13 @@ module Buncher
|
|
60
60
|
|
61
61
|
# run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
63
|
-
def self.cluster(elements, weights)
|
64
|
-
changed=true
|
65
|
-
round=0
|
63
|
+
def self.cluster(elements, weights,options={})
|
66
64
|
solutions={}
|
67
|
-
|
65
|
+
min_size=options[:min_size] || 1
|
66
|
+
# try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 50% .. ok, I did
|
68
67
|
# that.
|
69
68
|
not_clustered = last_sK = last_aK =last_fK=nil
|
70
|
-
max_clusters=[
|
69
|
+
max_clusters=[min_size,(elements.size/2).floor].max
|
71
70
|
(1..max_clusters).each do |number_clusters|
|
72
71
|
initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
|
73
72
|
centers = initial_centers.map(&:dup)
|
@@ -76,13 +75,12 @@ module Buncher
|
|
76
75
|
not_clustered ||=centers
|
77
76
|
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
78
77
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
79
|
-
|
80
|
-
solutions[last_fK]=centers
|
78
|
+
solutions[last_fK]=centers if number_clusters >= min_size
|
81
79
|
# break if number_clusters == 2 ## debugging
|
82
80
|
end
|
83
|
-
min_fK =solutions.keys.sort.first
|
81
|
+
min_fK =solutions.keys.sort.first || 1.0
|
84
82
|
if min_fK > 0.85
|
85
|
-
|
83
|
+
nil # ie, not clustered at all
|
86
84
|
else
|
87
85
|
solutions[min_fK]
|
88
86
|
end
|
data/test/test_buncher.rb
CHANGED
@@ -12,7 +12,7 @@ def init_data(number_points, number_clusters)
|
|
12
12
|
elements=[]
|
13
13
|
extra=0
|
14
14
|
index=0
|
15
|
-
seed=[[0,1],[0,0],[1,0]]
|
15
|
+
seed=[[0,1],[0,0],[1,0],[1,1]]
|
16
16
|
number_clusters.times do
|
17
17
|
# gens = [Rubystats::NormalDistribution.new(rand(0..1), 0.05), Rubystats::NormalDistribution.new(rand(0..1), 0.05)]
|
18
18
|
gens = [Rubystats::NormalDistribution.new(seed[index][0], 0.05), Rubystats::NormalDistribution.new(seed[index][1], 0.05)]
|
@@ -94,11 +94,11 @@ class TestBuncher < Minitest::Test
|
|
94
94
|
assert_equal(new_centers.size,3)
|
95
95
|
end
|
96
96
|
# EOS
|
97
|
-
def
|
98
|
-
elements =
|
99
|
-
new_centers = Buncher::cluster(elements,[1]*2)
|
97
|
+
def test_min_size_works
|
98
|
+
elements = init_data(100,4)
|
99
|
+
new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
|
100
100
|
dump(new_centers)
|
101
|
-
assert_equal(new_centers.size,
|
101
|
+
assert_equal(new_centers.size,4)
|
102
102
|
end
|
103
103
|
|
104
104
|
# WWW=<<-'EOS'
|