buncher 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/buncher.rb +4 -4
- data/lib/buncher/buncher.bundle +0 -0
- data/test/test_buncher.rb +27 -23
- metadata +2 -2
data/lib/buncher.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'buncher/buncher'
|
2
2
|
module Buncher
|
3
|
-
VERSION = "1.0.
|
3
|
+
VERSION = "1.0.4"
|
4
4
|
# your cluster needs to look like this. Make a bunch of them and pass them in. It's ok to pass in empty elements to start.
|
5
5
|
class Cluster
|
6
6
|
attr_accessor :elements
|
@@ -58,7 +58,7 @@ module Buncher
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
# run the clustering algorithm until have calculated the
|
61
|
+
# run the clustering algorithm until we have calculated the best number of clusters, taken from this paper:
|
62
62
|
# http://papers.nips.cc/paper/2526-learning-the-k-in-k-means.pdf
|
63
63
|
def self.cluster(elements, weights,options={})
|
64
64
|
solutions={}
|
@@ -76,11 +76,11 @@ module Buncher
|
|
76
76
|
last_fK, last_sK, last_aK = fK(centers,last_sK, last_aK,weights)
|
77
77
|
puts "summary #{number_clusters}: fK() = #{last_fK}, last_sK=#{last_sK} last_aK=#{last_aK} "
|
78
78
|
solutions[last_fK]=centers if number_clusters >= min_size
|
79
|
-
# break if number_clusters ==
|
79
|
+
# break if number_clusters == 3 ## debugging
|
80
80
|
end
|
81
81
|
min_fK =solutions.keys.sort.first || 1.0
|
82
82
|
if min_fK > 0.85
|
83
|
-
|
83
|
+
elements.map {|ele| Cluster.new(ele,[ele])} # ie, not clustered at all
|
84
84
|
else
|
85
85
|
solutions[min_fK]
|
86
86
|
end
|
data/lib/buncher/buncher.bundle
CHANGED
Binary file
|
data/test/test_buncher.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "buncher"
|
2
2
|
require 'rubystats'
|
3
3
|
require 'gnuplot'
|
4
|
+
require "minitest/focus"
|
4
5
|
|
5
6
|
def dump(centers)
|
6
|
-
puts "centers are"
|
7
|
-
centers.each {|ccc| puts "center #{ccc.center.inspect} #{ccc.elements[0]}, #{ccc.elements[1]}"}
|
7
|
+
puts "#{centers.size} centers are"
|
8
|
+
centers.each {|ccc| puts "center #{ccc.center.inspect} elements #{ccc.elements[0]}, #{ccc.elements[1]}"}
|
8
9
|
end
|
9
10
|
|
10
11
|
def init_data(number_points, number_clusters)
|
@@ -64,13 +65,19 @@ def plot(file_name,points,centers, initial_centers=nil)
|
|
64
65
|
end
|
65
66
|
|
66
67
|
class TestBuncher < Minitest::Test
|
67
|
-
|
68
|
+
|
68
69
|
def test_distance
|
69
70
|
element1= [1.0, 0.5, 0.533333333333333, 1.0, 0.0, 0.0, 0.333333333333333, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.666666666666667, 0.0, 0.0, 1.0, 0.333333333333333, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.0, 0.333333333333333, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333333333333333, 0.333333333333333, 0.0, 0.333333333333333, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.333333333333333, 0.0, 0.666666666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
70
71
|
element2= [1.0, 1.0, 0.5, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
71
72
|
weights= [0.12158054711246201, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.060790273556231005, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.0060790273556231, 0.0060790273556231, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.0121580547112462, 0.0060790273556231, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155, 0.00303951367781155]
|
72
73
|
assert_in_delta(Buncher::distance(element1,element2, weights),0.098,0.001)
|
73
74
|
end
|
75
|
+
def test_distance_nan
|
76
|
+
element1 = [nil, 1, 1]
|
77
|
+
element2 = [nil, nil, 1]
|
78
|
+
weights = [1] * 3
|
79
|
+
assert_in_delta(Buncher::distance(element1,element2, weights),0.5,0.01)
|
80
|
+
end
|
74
81
|
def test_should_find_the_one_center
|
75
82
|
elements = [[1,1]]
|
76
83
|
starting_centers = elements.sample(1).map {|aaa| Buncher::Cluster.new(aaa)}
|
@@ -80,9 +87,13 @@ class TestBuncher < Minitest::Test
|
|
80
87
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
81
88
|
end
|
82
89
|
|
90
|
+
|
83
91
|
def test_choose_centers_wrapper
|
84
92
|
elements = [[1,1]]
|
93
|
+
puts "test_choose_centers_wrapper - start"
|
85
94
|
new_centers = Buncher::choose_centers(elements,[1]*2, 1)
|
95
|
+
puts "test_choose_centers_wrapper - well well"
|
96
|
+
puts new_centers.inspect
|
86
97
|
dump(new_centers)
|
87
98
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
88
99
|
assert_in_delta(new_centers.first.center[1],1.0,0.01)
|
@@ -91,41 +102,34 @@ class TestBuncher < Minitest::Test
|
|
91
102
|
elements = init_data(100,3)
|
92
103
|
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
93
104
|
dump(new_centers)
|
94
|
-
assert_equal(new_centers.size
|
105
|
+
assert_equal(3, new_centers.size)
|
95
106
|
end
|
96
|
-
|
107
|
+
|
108
|
+
|
97
109
|
def test_min_size_works
|
98
|
-
elements = init_data(100,
|
110
|
+
elements = init_data(100,1)
|
99
111
|
new_centers = Buncher::cluster(elements,[1]*2,:min_size=>2)
|
100
|
-
|
101
|
-
assert_equal(new_centers.size,4)
|
102
|
-
end
|
103
|
-
|
104
|
-
# WWW=<<-'EOS'
|
105
|
-
|
106
|
-
def test_choose_centers_wrapper2
|
107
|
-
elements = init_data(100,3)
|
108
|
-
new_centers = Buncher::choose_centers(elements, [1]*2, 3)
|
109
|
-
plot("/tmp/kmeans_initial_#{new_centers.size}.png",elements, new_centers)
|
110
|
-
dump(new_centers)
|
111
|
-
`open /tmp/kmeans_initial_#{new_centers.size}.png`
|
112
|
-
assert_equal(new_centers.size,3)
|
112
|
+
assert(new_centers.size >=2,"at least 2 centers")
|
113
113
|
end
|
114
|
-
|
114
|
+
focus
|
115
115
|
def test_gaussian_distribution_of_100_points_in_3_clusters
|
116
|
-
|
116
|
+
1.times do |run|
|
117
117
|
# srand(843284148793854177950180651080082381)
|
118
118
|
elements = init_data(100,3)
|
119
119
|
# elements.each {|eee| puts "#{eee[0]},#{eee[1]}"}
|
120
|
+
graphfile=nil;
|
120
121
|
# new_centers = Buncher::cluster(elements) {|elements,centers, initial_centers| puts "run #{run} setup";plot("/tmp/#{run}_centers_#{centers.size}.png",elements,initial_centers)}
|
121
122
|
new_centers = Buncher::cluster(elements,[1]*2) {|elements,centers, initial_centers|
|
122
|
-
|
123
|
+
graphfile = "/tmp/#{run}_centers_#{centers.size}.png"
|
124
|
+
plot(graphfile,elements,centers, initial_centers)
|
123
125
|
}
|
124
126
|
puts "run #{run}: k is #{new_centers.size}, seed was #{srand}"
|
125
127
|
puts "ERROR "*4 if new_centers.size != 3
|
126
128
|
puts
|
127
129
|
assert_equal(3,new_centers.size)
|
130
|
+
refute(new_centers.first.elements.empty?)
|
131
|
+
graphfile = "/tmp/#{run}_centers_3.png"
|
132
|
+
`open #{graphfile}`
|
128
133
|
end
|
129
134
|
end
|
130
|
-
# EOS
|
131
135
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buncher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-04-
|
12
|
+
date: 2015-04-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|