buncher 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/buncher.rb +8 -10
- data/test/test_buncher.rb +5 -5
- metadata +1 -1
data/lib/buncher.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'buncher/buncher'
|
2
2
|
module Buncher
|
3
|
-
VERSION = "1.0.
|
3
|
+
VERSION = "1.0.3"
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
5
5
|
class Cluster
|
6
6
|
attr_accessor :elements
|
@@ -60,14 +60,13 @@ module Buncher
|
|
60
60
|
|
61
61
|
# run the clustering algorithm until have calculated the current number of clusters, taken from this paper:
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
63
|
-
def self.cluster(elements, weights)
|
64
|
-
changed=true
|
65
|
-
round=0
|
63
|
+
def self.cluster(elements, weights,options={})
|
66
64
|
solutions={}
|
67
|
-
|
65
|
+
min_size=options[:min_size] || 1
|
66
|
+
# try all the sizes of clusters up to #elements. Ok, sure we could probably do something like 50% .. ok, I did
|
68
67
|
# that.
|
69
68
|
not_clustered = last_sK = last_aK =last_fK=nil
|
70
|
-
max_clusters=[
|
69
|
+
max_clusters=[min_size,(elements.size/2).floor].max
|
71
70
|
(1..max_clusters).each do |number_clusters|
|
72
71
|
initial_centers = choose_centers(elements, weights, number_clusters) # C++ Native code
|
73
72
|
centers = initial_centers.map(&:dup)
|
@@ -76,13 +75,12 @@ module Buncher
|
|
76
75
|
not_clustered ||=centers
|
77
76
|
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
78
77
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
79
|
-
|
80
|
-
solutions[last_fK]=centers
|
78
|
+
solutions[last_fK]=centers if number_clusters >= min_size
|
81
79
|
# break if number_clusters == 2 ## debugging
|
82
80
|
end
|
83
|
-
min_fK =solutions.keys.sort.first
|
81
|
+
min_fK =solutions.keys.sort.first || 1.0
|
84
82
|
if min_fK > 0.85
|
85
|
-
|
83
|
+
nil # ie, not clustered at all
|
86
84
|
else
|
87
85
|
solutions[min_fK]
|
88
86
|
end
|
data/test/test_buncher.rb
CHANGED
@@ -12,7 +12,7 @@ def init_data(number_points, number_clusters)
|
|
12
12
|
elements=[]
|
13
13
|
extra=0
|
14
14
|
index=0
|
15
|
-
seed=[[0,1],[0,0],[1,0]]
|
15
|
+
seed=[[0,1],[0,0],[1,0],[1,1]]
|
16
16
|
number_clusters.times do
|
17
17
|
# gens = [Rubystats::NormalDistribution.new(rand(0..1), 0.05), Rubystats::NormalDistribution.new(rand(0..1), 0.05)]
|
18
18
|
gens = [Rubystats::NormalDistribution.new(seed[index][0], 0.05), Rubystats::NormalDistribution.new(seed[index][1], 0.05)]
|
@@ -94,11 +94,11 @@ class TestBuncher < Minitest::Test
|
|
94
94
|
assert_equal(new_centers.size,3)
|
95
95
|
end
|
96
96
|
# EOS
|
97
|
-
def
|
98
|
-
elements =
|
99
|
-
new_centers = Buncher::cluster(elements,[1]*2)
|
97
|
+
def test_min_size_works
|
98
|
+
elements = init_data(100,4)
|
99
|
+
new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
|
100
100
|
dump(new_centers)
|
101
|
-
assert_equal(new_centers.size,
|
101
|
+
assert_equal(new_centers.size,4)
|
102
102
|
end
|
103
103
|
|
104
104
|
# WWW=<<-'EOS'
|